Snap for 8426163 from 5cfdcf077be7b3727160275e83ae6b754f01dd07 to mainline-tzdata2-release Change-Id: I3b65f438bbe39df2585f87debc1a1e590ecb30c3

commit: 18ba13d4cd30ecc17137028005700cc22e797dbd [log] [tgz]
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> Fri Apr 08 16:01:30 2022 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> Fri Apr 08 16:01:30 2022 +0000
tree: e82e1600981b5545830c12f105b4e415ab7287af
parent: 27a1013439ea58ea4aaf49fc8601ec0b1773e839 [diff]
parent: 5cfdcf077be7b3727160275e83ae6b754f01dd07 [diff]
diff --git a/.appveyor.yml b/.appveyor.yml
index ea01077..a6f3c39 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml

@@ -34,7 +34,7 @@
   - ps: cd OpenCL-ICD-Loader
   - ps: mkdir build
   - ps: cd build
-  - cmake -A%PLATFORM% -DENABLE_OPENCL30_PROVISIONAL=1 -DOPENCL_ICD_LOADER_HEADERS_DIR=%TOP%/OpenCL-Headers/ ..
+  - cmake -A%PLATFORM% -DOPENCL_ICD_LOADER_HEADERS_DIR=%TOP%/OpenCL-Headers/ ..
   - cmake --build . --config %CONFIGURATION%
   - ps: cd $env:TOP
   # Get the libclcxx standard library:

diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
deleted file mode 100644
index 0c1778e..0000000
--- a/.github/workflows/presubmit.yml
+++ /dev/null

@@ -1,41 +0,0 @@
-name: Presubmit
-on: [push, pull_request]
-
-jobs:
-  build:
-    name: Build ${{ matrix.os }} ${{ matrix.name }}
-    runs-on: ${{ matrix.os }}
-    env:
-      JOB_ARCHITECTURE: ${{ matrix.arch }}
-      JOB_ENABLE_GL: ${{ matrix.gl }}
-    strategy:
-      matrix:
-        mainmatrix: [true]
-        os: [ubuntu-20.04, macos-11.0]
-        include:
-          - os: ubuntu-20.04
-            mainmatrix: true
-            gl: 1
-          - os: ubuntu-20.04
-            mainmatrix: false
-            name: Arm
-            arch: arm
-          - os: ubuntu-20.04
-            mainmatrix: false
-            name: AArch64
-            arch: aarch64
-    steps:
-      - uses: actions/checkout@v2
-      - name: Build
-        run: ./presubmit.sh
-  formatcheck:
-    name: Check code format
-    runs-on: ubuntu-20.04
-    steps:
-      - name: Install packages
-        run: sudo apt install -y clang-format
-      - uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Check code format
-        run: ./check-format.sh

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b826e68
--- /dev/null
+++ b/.gitignore

@@ -0,0 +1,4 @@
+# build directories
+build/
+build_lnx/
+build_win/
\ No newline at end of file

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..98984d8
--- /dev/null
+++ b/.travis.yml

@@ -0,0 +1,22 @@
+language: cpp
+
+os:
+  - linux
+  - osx
+
+jobs:
+  include:
+  - os: linux
+    dist: bionic
+    env: JOB_CHECK_FORMAT=1
+    addons:
+        apt:
+          packages:
+            - clang-format-9
+  - os: linux
+    env: JOB_ARCHITECTURE=arm
+  - os: linux
+    env: JOB_ARCHITECTURE=aarch64
+
+script:
+  - ./travis.sh

diff --git a/Android.bp b/Android.bp
deleted file mode 100644
index f48dae0..0000000
--- a/Android.bp
+++ /dev/null

@@ -1,501 +0,0 @@
-// *** THIS PACKAGE HAS SPECIAL LICENSING CONDITIONS.  PLEASE
-//     CONSULT THE OWNERS AND opensource-licensing@google.com BEFORE
-//     DEPENDING ON IT IN YOUR PROJECT. ***
-package {
-    default_applicable_licenses: ["external_OpenCL-CTS_license"],
-}
-
-// Added automatically by a large-scale-change that took the approach of
-// 'apply every license found to every target'. While this makes sure we respect
-// every license restriction, it may not be entirely correct.
-//
-// e.g. GPL in an MIT project might only apply to the contrib/ directory.
-//
-// Please consider splitting the single license below into multiple licenses,
-// taking care not to lose any license_kind information, and overriding the
-// default license using the 'licenses: [...]' property on targets as needed.
-//
-// For unused files, consider creating a 'fileGroup' with "//visibility:private"
-// to attach the license to, and including a comment whether the files may be
-// used in the current project.
-// See: http://go/android-license-faq
-license {
-    name: "external_OpenCL-CTS_license",
-    visibility: [":__subpackages__"],
-    license_kinds: [
-        "SPDX-license-identifier-Apache-2.0",
-        "SPDX-license-identifier-BSD",
-        "SPDX-license-identifier-MIT",
-        "SPDX-license-identifier-Unlicense",
-        "legacy_by_exception_only", // by exception only
-        "legacy_proprietary", // by exception only
-        "legacy_unencumbered",
-    ],
-    license_text: [
-        "LICENSE.txt",
-    ],
-}
-
-cc_library_headers {
-    name: "ocl-harness-headers",
-    export_include_dirs: [
-        "test_common/harness",
-        "test_common"
-    ]
-}
-
-cc_defaults {
-    name: "ocl-harness-defaults",
-    header_libs: [
-        "ocl-harness-headers",
-    ],
-    export_header_lib_headers: [
-        "ocl-harness-headers",
-    ],
-    cflags: [
-        "-DCL_EXPERIMENTAL",
-        "-DCL_TARGET_OPENCL_VERSION=300",
-        "-Wno-#warnings",
-        "-Wno-absolute-value",
-        "-Wno-asm-operand-widths",
-        "-Wno-c++11-narrowing",
-        "-Wno-dangling-else",
-        "-Wno-date-time",
-        "-Wno-deprecated-declarations",
-        "-Wno-format",
-        "-Wno-ignored-pragmas",
-        "-Wno-ignored-qualifiers",
-        "-Wno-implicit-fallthrough",
-        "-Wno-logical-op-parentheses",
-        "-Wno-macro-redefined",
-        "-Wno-missing-braces",
-        "-Wno-missing-declarations",
-        "-Wno-missing-field-initializers",
-        "-Wno-non-virtual-dtor",
-        "-Wno-overloaded-virtual",
-        "-Wno-parentheses",
-        "-Wno-parentheses-equality",
-        "-Wno-reorder-ctor",
-        "-Wno-return-stack-address",
-        "-Wno-shift-negative-value",
-        "-Wno-sometimes-uninitialized",
-        "-Wno-switch",
-        "-Wno-unknown-pragmas",
-        "-Wno-unneeded-internal-declaration",
-        "-Wno-unused-function",
-        "-Wno-unused-label",
-        "-Wno-unused-parameter",
-        "-Wno-unused-variable",
-        "-Wno-writable-strings",
-        "-fexceptions",
-    ],
-    static_libs: [
-        "ocl-stubs"
-    ],
-}
-
-cc_library {
-    name: "ocl-harness",
-    srcs: [ "test_common/harness/*.cpp", ],
-    defaults: [ "ocl-harness-defaults" ],
-}
-
-cc_defaults {
-    name: "ocl-test-defaults",
-    defaults: [ "ocl-harness-defaults" ],
-    static_libs: [ "ocl-harness" ],
-    compile_multilib: "64",
-    multilib: {
-        lib64: {
-            suffix: "64",
-        },
-    },
-}
-
-cc_defaults {
-    name: "ocl-test-image-defaults",
-    srcs: [ "test_conformance/images/common.cpp" ],
-    export_include_dirs: [ "test_conformance/images" ],
-    defaults: [ "ocl-test-defaults" ],
-}
-
-
-cc_test {
-    name: "ocl-test-allocations",
-    srcs: [ "test_conformance/allocations/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-api",
-    srcs: [ "test_conformance/api/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-atomics",
-    srcs: [ "test_conformance/atomics/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-basic",
-    srcs: [ "test_conformance/basic/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-buffers",
-    srcs: [ "test_conformance/buffers/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-c11-atomics",
-    srcs: [ "test_conformance/c11_atomics/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-commonfns",
-    srcs: [ "test_conformance/commonfns/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-compiler",
-    srcs: [ "test_conformance/compiler/*.cpp" ],
-    data: [ "test_conformance/compiler/includeTestDirectory/testIncludeFile.h", "test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-computeinfo",
-    srcs: [ "test_conformance/computeinfo/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-contractions",
-    srcs: [ "test_conformance/contractions/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-conversions",
-    srcs: [ "test_conformance/conversions/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-device-execution",
-    srcs: [ "test_conformance/device_execution/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-device-partition",
-    srcs: [ "test_conformance/device_partition/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-device-timer",
-    srcs: [ "test_conformance/device_timer/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-events",
-    srcs: [ "test_conformance/events/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-generic-address-space",
-    srcs: [ "test_conformance/generic_address_space/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-geometrics",
-    srcs: [ "test_conformance/geometrics/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-half",
-    srcs: [ "test_conformance/half/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-integer-ops",
-    srcs: [ "test_conformance/integer_ops/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-math-brute-force",
-    srcs: [ "test_conformance/math_brute_force/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-mem-host-flags",
-    srcs: [ "test_conformance/mem_host_flags/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-multiple-device-context",
-    srcs: [ "test_conformance/multiple_device_context/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-non-uniform-work-group",
-    srcs: [ "test_conformance/non_uniform_work_group/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-pipes",
-    srcs: [ "test_conformance/pipes/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-printf",
-    srcs: [ "test_conformance/printf/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-profiling",
-    srcs: [ "test_conformance/profiling/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-relationals",
-    srcs: [ "test_conformance/relationals/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-select",
-    srcs: [ "test_conformance/select/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-spirv-new",
-    srcs: [ "test_conformance/spirv_new/*.cpp", "test_conformance/math_brute_force/reference_math.cpp", "test_conformance/math_brute_force/utility.cpp" ],
-    data: [ "test_conformance/spirv_new/spirv_asm/*", "test_conformance/spirv_new/spirv_bin/*" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-subgroups",
-    srcs: [ "test_conformance/subgroups/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-svm",
-    srcs: [ "test_conformance/SVM/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-thread-dimensions",
-    srcs: [ "test_conformance/thread_dimensions/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-vectors",
-    srcs: [ "test_conformance/vectors/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-image-clcopyimage",
-    srcs: [ "test_conformance/images/clCopyImage/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-image-clfillimage",
-    srcs: [ "test_conformance/images/clFillImage/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-image-clgetinfo",
-    srcs: [ "test_conformance/images/clGetInfo/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-image-clreadwriteimage",
-    srcs: [ "test_conformance/images/clReadWriteImage/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-image-kernel-image-methods",
-    srcs: [ "test_conformance/images/kernel_image_methods/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-image-kernel-read-write",
-    srcs: [ "test_conformance/images/kernel_read_write/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-
-cc_test {
-    name: "ocl-test-image-samplerlessreads",
-    srcs: [ "test_conformance/images/samplerlessReads/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-}
-
-python_test_host {
-    name: "opencl_cts",
-    main: "scripts/test_opencl_cts.py",
-    srcs: [ "scripts/test_opencl_cts.py" ],
-    data: [ "scripts/test_opencl_cts.xml" ],
-    test_config: "scripts/test_opencl_cts.xml",
-    version: {
-        py2: {
-            enabled: false,
-        },
-        py3: {
-            enabled: true
-        }
-    },
-    test_options: {
-        unit_test: false,
-    },
-}

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 083ea96..799460d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.5.1)
+cmake_minimum_required(VERSION 3.1)
 
 set( CONFORMANCE_SUFFIX "" )
 set(CLConform_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
@@ -17,7 +17,6 @@
 endif(CMAKE_BUILD_TYPE STREQUAL "release")
 
 add_definitions(-DCL_TARGET_OPENCL_VERSION=300)
-add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_2_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_1_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_2_APIS=1)
@@ -60,15 +59,63 @@
 #-----------------------------------------------------------
 #Vendor Customization File can be included here to provide a way to automatically
 #build driver as a dependency of the conformance tests, or other such CMake customization
-include(CMakeVendor.txt OPTIONAL)
+option(USE_VENDOR_CUSTOM_FILE "Use Vendor Customization File" OFF)
+if(USE_VENDOR_CUSTOM_FILE)
+    include(CMakeVendor.txt OPTIONAL)
+endif(USE_VENDOR_CUSTOM_FILE)
 
+#-----------------------------------------------------------
+# Development options for OpenCL C++ tests
+#-----------------------------------------------------------
+# Use OpenCL C kernels instead of OpenCL C++ kernels
+option(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS "Use OpenCL C kernels in OpenCL C++ tests" OFF)
+if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+  set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+# Only check if OpenCL C++ kernels compile to SPIR-V
+option(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION "Only check if OpenCL C++ kernels compile to SPIR-V" OFF)
+if(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
+  if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+    message(FATAL_ERROR "Can't use OpenCL C kernels and compile to SPIR-V.")
+  endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+  set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
+endif(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
+#
+if(CLPP_DEVELOPMENT_OPTIONS)
+  add_definitions(-DCLPP_DEVELOPMENT_OPTIONS)
+  add_definitions(${CLPP_DEVELOPMENT_OPTIONS})
+endif(CLPP_DEVELOPMENT_OPTIONS)
+
+# Offline OpenCL C/C++ compiler provided by Khronos is the only supported
+# offline compiler.
+#
+# Path to offline OpenCL C/C++ compiler provided by Khronos.
+# See https://github.com/KhronosGroup/SPIR/ (spirv-1.1 branch or newer SPIR-V-ready
+# branch should be used).
+if(KHRONOS_OFFLINE_COMPILER)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DKHRONOS_OFFLINE_COMPILER=${KHRONOS_OFFLINE_COMPILER}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKHRONOS_OFFLINE_COMPILER=${KHRONOS_OFFLINE_COMPILER}")
+    # Additional OpenCL C/C++ compiler option.
+    if(KHRONOS_OFFLINE_COMPILER_OPTIONS)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DKHRONOS_OFFLINE_COMPILER_OPTIONS=${KHRONOS_OFFLINE_COMPILER_OPTIONS}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKHRONOS_OFFLINE_COMPILER_OPTIONS=${KHRONOS_OFFLINE_COMPILER_OPTIONS}")
+    endif(KHRONOS_OFFLINE_COMPILER_OPTIONS)
+else(KHRONOS_OFFLINE_COMPILER)
+    message(WARNING "KHRONOS_OFFLINE_COMPILER is not defined!")
+    message(WARNING "Running CL C++ tests will not be possible.")
+endif(KHRONOS_OFFLINE_COMPILER)
+
+# CL_LIBCLCXX_DIR - path to dir with OpenCL C++ STL (libclcxx)
 # CL_INCLUDE_DIR - path to dir with OpenCL headers
-if(CL_INCLUDE_DIR AND CL_LIB_DIR)
+# CL_LIBCLCXX_DIR - path to dir with OpenCL library
+if(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
     link_directories(${CL_LIB_DIR})
-else(CL_INCLUDE_DIR AND CL_LIB_DIR)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}")
+else(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
     message(STATUS "OpenCL hasn't been found!")
-    message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR")
-endif(CL_INCLUDE_DIR AND CL_LIB_DIR)
+    message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR, -DCL_LIB_DIR and -DCL_LIBCLCXX_DIR")
+endif(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
 
 # CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries
 if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR)
@@ -119,42 +166,23 @@
             STREQUAL "x86")
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse -msse2 -mfpmath=sse")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -mfpmath=sse")
-
-        add_cxx_flag_if_supported(-frounding-math)
     endif()
 else()
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
 endif()
 
-if(MSVC)
-    # Don't warn when using standard non-secure functions.
-    add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
-endif()
-
 if( WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" )
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
 endif()
 
 list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES})
 if(ANDROID)
     list(APPEND CLConform_LIBRARIES m)
-endif()
-if(NOT DEFINED LINK_PTHREAD)
-    if(ANDROID OR WIN32)
-        set(LINK_PTHREAD OFF)
-    else()
-        set(LINK_PTHREAD ON)
-    endif()
-endif()
-if(LINK_PTHREAD)
+elseif(NOT WIN32)
     list(APPEND CLConform_LIBRARIES pthread)
-endif()
-
-if(DEFINED USE_GLES3)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGLES3")
-endif()
+endif(ANDROID)
 
 if(APPLE)
     find_library(corefoundation CoreFoundation)
@@ -178,6 +206,7 @@
 
 add_subdirectory(test_common)
 add_subdirectory(test_conformance)
+add_subdirectory(test_extensions)
 
 # Support both VS2008 and VS2012.
 set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll")
@@ -203,4 +232,17 @@
   add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} )
 endif(WIN32)
 
+# Copy required CL include directories into the build directory
+# as required for the compiler testing.
+
+# ... For running the compiler test on the command line.
+file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_conformance/compiler)
+file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_conformance/compiler)
+
+# ... For running the compiler test with VisualStudio.
+if(MSVC)
+  file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
+  file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
+endif(MSVC)
+
 set_property(TARGET COPY_FILES${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")

diff --git a/CMakeVendor.txt b/CMakeVendor.txt
new file mode 100644
index 0000000..14486ec
--- /dev/null
+++ b/CMakeVendor.txt

@@ -0,0 +1,8 @@
+# We intentionally hardcode "_win32" to ensure backwards compatibility (to avoid breaking HAAVE)
+if(ANDROID)
+   if(ARM64_V8A)
+       set(ARCH "64")
+   else(ARM64_V8A)
+       set(ARCH "32")
+   endif(ARM64_V8A)
+endif (ANDROID)

diff --git a/METADATA b/METADATA
deleted file mode 100644
index 5c12860..0000000
--- a/METADATA
+++ /dev/null

@@ -1,3 +0,0 @@
-third_party {
-  license_type: BY_EXCEPTION_ONLY
-}

diff --git a/build_android.py b/build_android.py
new file mode 100755
index 0000000..e1b8c1e
--- /dev/null
+++ b/build_android.py

@@ -0,0 +1,161 @@
+#!/usr/bin/python
+
+
+#-------------------------------------------------------------------------------#
+# android-cmake and android-ndk based build script for conformance
+#-------------------------------------------------------------------------------#
+"""
+Dependencies:
+
+1) android-ndk version android-ndk-r10d or higher is required. Further, the environment
+   variable ANDROID_NDK should be defined to point to it.
+
+2) android-cmake should be installed (else the script can install it for you). If installed,
+   the environment variable ANDROID_CMAKE should point to install location, unless it is in the current
+   working directory in which case it is picked up by default.
+
+3) CL_INCLUDE_DIR should be defined to point to CL headers. Alternately, this can be provided
+   as an input (-I)
+
+4) Path to opencl library to link against (libOpenCL.so) can be provided using -L. If this isn't
+   available the script will try to use CL_LIB_DIR_64 or CL_LIB_DIR_32 environment variables -
+   if available - to pick up the right library for the architecture being built.
+
+
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import time
+import shlex
+
+start  = time.time()
+script = os.path.basename( sys.argv[ 0 ] )
+
+def die (msg):
+    print msg
+    exit(-1)
+
+def execute (cmdline):
+    retcode = subprocess.call(cmdline)
+    if retcode != 0:
+        raise Exception("Failed to execute '%s', got %d" % (commandLine, retcode))
+
+def build(args):
+    if not (args.testDir):
+        print("building...")
+        execute("make")
+    else:
+        if os.path.exists( os.path.join(args.bld_dir, "test_conformance", args.testDir) ):
+            os.chdir( os.path.join("test_conformance",args.testDir) )
+            print("Building test: %s..." %args.testDir)
+            execute("make")
+            os.chdir(args.bld_dir)
+        else:
+            print ("Error: %s test doesn't exist" %args.testDir)
+
+
+def configure (args):
+    print("configuring...")
+    cmdline = []
+    cmdline.extend(['cmake', "-DCMAKE_TOOLCHAIN_FILE=" + os.path.join(args.android_cmake,"android.toolchain.cmake")])
+    for var in args.cmake_defs :
+        cmdline.extend([ '-D', var ])
+    cmdline.extend(['-DCL_INCLUDE_DIR=' + args.inc_dir])
+    cmdline.extend(['-DCL_LIB_DIR=' + args.lib_dir])
+    cmdline.extend(['-DANDROID_NATIVE_API_LEVEL=' + "android-21"])
+    if args.arch == "64":
+        cmdline.extend(['-DANDROID_ABI=arm64-v8a'])
+        cmdline.extend(['-DANDROID_SO_UNDEFINED=ON'])
+    cmdline.extend([args.src_dir])
+    execute(cmdline)
+
+def check_var (parser, args, name):
+    if not(args.__dict__[name]):
+        parser.error("%s needs to be defined" % name)
+
+def print_config(args):
+    print("----------CONFIGURATION--------------\n")
+    print("android_cmake: %s" % args.android_cmake)
+    print("android_ndk:   %s" % args.android_ndk)
+    print("lib_dir:       %s" % args.lib_dir)
+    print("inc_dir:       %s" % args.inc_dir)
+    if len(args.cmake_defs):
+        print("cmake options:" + "\n:".join( [ " `%s'" % dir for dir in args.cmake_defs ] ))
+    print("architecture:  %s" % args.arch)
+    print("-------------------------------------\n")
+
+def get_input():
+    yes = set(['yes','y', 'ye', ''])
+    no = set(['no','n'])
+
+    choice = raw_input().lower()
+    if choice in yes:
+        return True
+    elif choice in no:
+        return False
+    else:
+        sys.stdout.write("Please respond with 'yes' or 'no'")
+        exit()
+
+def install_android_cmake():
+    parser.print_help()
+    print "\nandroid-cmake doesn't seem to be installed - It should be provided as a) cmdline input b) environment variable $ANDROID_CMAKE or c) present in the current directory\n"
+    print "if you would like to download and install it in the current directory please enter yes\n"
+    print "if you would like to provide an environment variable($ANDROID_CMAKE) or command-line input(--android_cmake) rerun the script enter no\n"
+    print "input: "
+    if get_input():
+        print("installing android-cmake")
+        #subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake'])
+        # Use a newer fork of android-cmake which has been updated to support Clang. GCC is deprecated in newer NDKs and C11 atomics conformance doesn't build with NDK > 10.
+        subprocess.call(['git', 'clone', 'https://github.com/daewoong-jang/android-cmake'])
+        args.android_cmake = os.path.join(args.src_dir,"android-cmake")
+    else:
+        exit()
+
+try:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--android_cmake', dest='android_cmake', default=os.environ.get('ANDROID_CMAKE'), help="Path to android-cmake (can also be set using environment variable $ANDROID_CMAKE).")
+    parser.add_argument('--android_ndk', dest='android_ndk', default=os.environ.get('ANDROID_NDK'), help="Path to android-ndk (can also be set using environment variable $ANDROID_NDK).")
+    parser.add_argument('-L','--lib_dir', dest='lib_dir', default="", help="Path to libOpenCL to link against (can also be set using environment variable $CL_LIB_DIR_32 and $CL_LIB_DIR_64).")
+    parser.add_argument('-I','--include_dir', dest='inc_dir', default=os.environ.get('CL_INCLUDE_DIR'), help="Path to headers (can also be set using environment variable $CL_INCLUDE_DIR).")
+    parser.add_argument('-D', dest='cmake_defs', action='append', default=[], help="Define CMAKE variable")
+    parser.add_argument('-a','--arch', default="32", help="Architecture to build for (32 or 64)")
+    parser.add_argument('-t','--test', dest='testDir', default="", help="Builds the given test")
+
+    args = parser.parse_args()
+
+    args.src_dir = os.path.realpath(os.path.dirname( sys.argv[ 0 ]))
+
+    if not (args.android_cmake):
+        if os.path.exists(os.path.join(args.src_dir,"android-cmake")):
+            args.android_cmake = os.path.join(args.src_dir,"android-cmake")
+        else:
+            install_android_cmake()
+
+    if not (args.lib_dir):
+        lib_var_name = "CL_LIB_DIR_" + ("32" if (args.arch == "32") else "64")
+        args.lib_dir = os.environ.get(lib_var_name)
+
+    check_var(parser, args, "android_cmake")
+    check_var(parser, args, "lib_dir")
+    check_var(parser, args, "inc_dir")
+    check_var(parser, args, "android_ndk")
+
+    print_config(args)
+
+    args.bld_dir = os.path.join(args.src_dir, 'bld_android_%s' % args.arch)
+    if not os.path.exists(args.bld_dir):
+        os.makedirs(args.bld_dir)
+    os.chdir(args.bld_dir)
+
+    configure(args)
+    build(args)
+
+    sys.exit( 0 )
+
+finally:
+    finish = time.time()
+    print("Elapsed time: %.0f s." % ( finish - start ) )

diff --git a/build_lnx.sh b/build_lnx.sh
new file mode 100755
index 0000000..f1c71f8
--- /dev/null
+++ b/build_lnx.sh

@@ -0,0 +1,6 @@
+#!/bin/sh
+
+mkdir -p build_lnx
+cd build_lnx
+cmake -g "Unix Makefiles" ../ -DKHRONOS_OFFLINE_COMPILER=<TO_SET> -DCL_LIBCLCXX_DIR=<TO_SET> -DCL_INCLUDE_DIR=<TO_SET> -DCL_LIB_DIR=<TO_SET> -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=. -DOPENCL_LIBRARIES=OpenCL
+make --jobs 8

diff --git a/build_win.bat b/build_win.bat
new file mode 100644
index 0000000..6ae3182
--- /dev/null
+++ b/build_win.bat

@@ -0,0 +1,32 @@
+@ECHO off
+setlocal ENABLEDELAYEDEXPANSION
+
+IF DEFINED ProgramFiles(x86) SET ProgFilesDir=%ProgramFiles(x86)%
+IF NOT DEFINED ProgFilesDir SET ProgFilesDir=%ProgramFiles%
+
+rem -------------------------------- Update these to match what's on your PC ------------------------------------------------
+
+SET VCPATH="%ProgFilesDir%\Microsoft Visual Studio 14.0\Common7\IDE\devenv.com"
+
+SET PATH=%CMAKEPATH%;%PATH%
+
+rem -------------------------------------------------------------------------------------------------------------------------
+
+setlocal ENABLEDELAYEDEXPANSION
+
+call "%VS140COMNTOOLS%\vsvars32.bat"
+
+mkdir build_win
+pushd build_win
+IF NOT EXIST CLConform.sln (
+   echo "Solution file not found, running Cmake"
+   cmake -G "Visual Studio 14 2015 Win64" ..\.  -DKHRONOS_OFFLINE_COMPILER=<TO_SET> -DCL_LIBCLCXX_DIR=<TO_SET> -DCL_INCLUDE_DIR=<TO_SET> -DCL_LIB_DIR=<TO_SET> -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=. -DOPENCL_LIBRARIES=OpenCL
+) else (
+   echo "Solution file found CLConform.sln "
+)
+
+echo Building CLConform.sln...
+%VCPATH% CLConform.sln /build
+
+
+GOTO:EOF

diff --git a/check-format.sh b/check-format.sh
index 7de2bd2..7eae2fd 100755
--- a/check-format.sh
+++ b/check-format.sh

@@ -2,10 +2,12 @@
 
 # Arg used to specify non-'origin/master' comparison branch
 ORIGIN_BRANCH=${1:-"origin/master"}
-CLANG_BINARY=${2:-"`which clang-format-9`"}
 
 # Run git-clang-format to check for violations
-CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY)
+if [ "$TRAVIS" == "true" ]; then
+    EXTRA_OPTS="--binary `which clang-format-9`"
+fi
+CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp $EXTRA_OPTS)
 
 # Check for no-ops
 grep '^no modified files to format$' <<<"$CLANG_FORMAT_OUTPUT" && exit 0

diff --git a/dependencies/Android.bp b/dependencies/Android.bp
deleted file mode 100644
index a8dbeee..0000000
--- a/dependencies/Android.bp
+++ /dev/null

@@ -1,39 +0,0 @@
-package {
-    // See: http://go/android-license-faq
-    // A large-scale-change added 'default_applicable_licenses' to import
-    // all of the 'license_kinds' from "external_OpenCL-CTS_license"
-    // to get the below license kinds:
-    //   SPDX-license-identifier-Apache-2.0
-    default_applicable_licenses: ["external_OpenCL-CTS_license"],
-}
-
-cc_library_headers {
-    name: "ocl-headers",
-    export_include_dirs: [ "ocl-headers" ],
-}
-
-cc_library {
-    name: "ocl-stubs",
-    srcs: [ "ocl-stubs/stubs.cpp" ],
-    cflags: [
-        "-DCL_EXPERIMENTAL",
-        "-DCL_TARGET_OPENCL_VERSION=300",
-    ],
-    header_libs: [ "ocl-headers" ],
-    export_header_lib_headers: [ "ocl-headers" ],
-    generated_headers: [ "generated-ocl-apis" ],
-}
-
-genrule {
-    name: "generated-ocl-apis",
-    out: [ "apis.h" ],
-    tool_files: [ "ocl-stubs/apis_generator.py" ],
-    srcs: [
-        "ocl-headers/CL/cl.h",
-        "ocl-headers/CL/cl_gl.h",
-        "ocl-headers/CL/cl_egl.h",
-        "ocl-headers/CL/cl_ext.h",
-        "ocl-headers/CL/cl_gl_ext.h",
-    ],
-    cmd: "python3 $(location) $(in) > $(out)"
-}

diff --git a/dependencies/ocl-headers/CL/cl.h b/dependencies/ocl-headers/CL/cl.h
deleted file mode 100644
index 0018a0f..0000000
--- a/dependencies/ocl-headers/CL/cl.h
+++ /dev/null

@@ -1,1929 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#ifndef __OPENCL_CL_H
-#define __OPENCL_CL_H
-
-#include <CL/cl_version.h>
-#include <CL/cl_platform.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/******************************************************************************/
-
-typedef struct _cl_platform_id *    cl_platform_id;
-typedef struct _cl_device_id *      cl_device_id;
-typedef struct _cl_context *        cl_context;
-typedef struct _cl_command_queue *  cl_command_queue;
-typedef struct _cl_mem *            cl_mem;
-typedef struct _cl_program *        cl_program;
-typedef struct _cl_kernel *         cl_kernel;
-typedef struct _cl_event *          cl_event;
-typedef struct _cl_sampler *        cl_sampler;
-
-typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
-typedef cl_ulong            cl_bitfield;
-typedef cl_ulong            cl_properties;
-typedef cl_bitfield         cl_device_type;
-typedef cl_uint             cl_platform_info;
-typedef cl_uint             cl_device_info;
-typedef cl_bitfield         cl_device_fp_config;
-typedef cl_uint             cl_device_mem_cache_type;
-typedef cl_uint             cl_device_local_mem_type;
-typedef cl_bitfield         cl_device_exec_capabilities;
-#ifdef CL_VERSION_2_0
-typedef cl_bitfield         cl_device_svm_capabilities;
-#endif
-typedef cl_bitfield         cl_command_queue_properties;
-#ifdef CL_VERSION_1_2
-typedef intptr_t            cl_device_partition_property;
-typedef cl_bitfield         cl_device_affinity_domain;
-#endif
-
-typedef intptr_t            cl_context_properties;
-typedef cl_uint             cl_context_info;
-#ifdef CL_VERSION_2_0
-typedef cl_properties       cl_queue_properties;
-#endif
-typedef cl_uint             cl_command_queue_info;
-typedef cl_uint             cl_channel_order;
-typedef cl_uint             cl_channel_type;
-typedef cl_bitfield         cl_mem_flags;
-#ifdef CL_VERSION_2_0
-typedef cl_bitfield         cl_svm_mem_flags;
-#endif
-typedef cl_uint             cl_mem_object_type;
-typedef cl_uint             cl_mem_info;
-#ifdef CL_VERSION_1_2
-typedef cl_bitfield         cl_mem_migration_flags;
-#endif
-typedef cl_uint             cl_image_info;
-#ifdef CL_VERSION_1_1
-typedef cl_uint             cl_buffer_create_type;
-#endif
-typedef cl_uint             cl_addressing_mode;
-typedef cl_uint             cl_filter_mode;
-typedef cl_uint             cl_sampler_info;
-typedef cl_bitfield         cl_map_flags;
-#ifdef CL_VERSION_2_0
-typedef intptr_t            cl_pipe_properties;
-typedef cl_uint             cl_pipe_info;
-#endif
-typedef cl_uint             cl_program_info;
-typedef cl_uint             cl_program_build_info;
-#ifdef CL_VERSION_1_2
-typedef cl_uint             cl_program_binary_type;
-#endif
-typedef cl_int              cl_build_status;
-typedef cl_uint             cl_kernel_info;
-#ifdef CL_VERSION_1_2
-typedef cl_uint             cl_kernel_arg_info;
-typedef cl_uint             cl_kernel_arg_address_qualifier;
-typedef cl_uint             cl_kernel_arg_access_qualifier;
-typedef cl_bitfield         cl_kernel_arg_type_qualifier;
-#endif
-typedef cl_uint             cl_kernel_work_group_info;
-#ifdef CL_VERSION_2_1
-typedef cl_uint             cl_kernel_sub_group_info;
-#endif
-typedef cl_uint             cl_event_info;
-typedef cl_uint             cl_command_type;
-typedef cl_uint             cl_profiling_info;
-#ifdef CL_VERSION_2_0
-typedef cl_properties       cl_sampler_properties;
-typedef cl_uint             cl_kernel_exec_info;
-#endif
-#ifdef CL_VERSION_3_0
-typedef cl_bitfield         cl_device_atomic_capabilities;
-typedef cl_bitfield         cl_device_device_enqueue_capabilities;
-typedef cl_uint             cl_khronos_vendor_id;
-typedef cl_properties       cl_mem_properties;
-typedef cl_uint             cl_version;
-#endif
-
-typedef struct _cl_image_format {
-    cl_channel_order        image_channel_order;
-    cl_channel_type         image_channel_data_type;
-} cl_image_format;
-
-#ifdef CL_VERSION_1_2
-
-typedef struct _cl_image_desc {
-    cl_mem_object_type      image_type;
-    size_t                  image_width;
-    size_t                  image_height;
-    size_t                  image_depth;
-    size_t                  image_array_size;
-    size_t                  image_row_pitch;
-    size_t                  image_slice_pitch;
-    cl_uint                 num_mip_levels;
-    cl_uint                 num_samples;
-#ifdef CL_VERSION_2_0
-#if defined(__GNUC__)
-    __extension__                   /* Prevents warnings about anonymous union in -pedantic builds */
-#endif
-#if defined(_MSC_VER) && !defined(__STDC__)
-#pragma warning( push )
-#pragma warning( disable : 4201 )   /* Prevents warning about nameless struct/union in /W4 builds */
-#endif
-#if defined(_MSC_VER) && defined(__STDC__)
-    /* Anonymous unions are not supported in /Za builds */
-#else
-    union {
-#endif
-#endif
-      cl_mem                  buffer;
-#ifdef CL_VERSION_2_0
-#if defined(_MSC_VER) && defined(__STDC__)
-    /* Anonymous unions are not supported in /Za builds */
-#else
-      cl_mem                  mem_object;
-    };
-#endif
-#if defined(_MSC_VER) && !defined(__STDC__)
-#pragma warning( pop )
-#endif
-#endif
-} cl_image_desc;
-
-#endif
-
-#ifdef CL_VERSION_1_1
-
-typedef struct _cl_buffer_region {
-    size_t                  origin;
-    size_t                  size;
-} cl_buffer_region;
-
-#endif
-
-#ifdef CL_VERSION_3_0
-
-#define CL_NAME_VERSION_MAX_NAME_SIZE 64
-
-typedef struct _cl_name_version {
-    cl_version              version;
-    char                    name[CL_NAME_VERSION_MAX_NAME_SIZE];
-} cl_name_version;
-
-#endif
-
-/******************************************************************************/
-
-/* Error Codes */
-#define CL_SUCCESS                                  0
-#define CL_DEVICE_NOT_FOUND                         -1
-#define CL_DEVICE_NOT_AVAILABLE                     -2
-#define CL_COMPILER_NOT_AVAILABLE                   -3
-#define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
-#define CL_OUT_OF_RESOURCES                         -5
-#define CL_OUT_OF_HOST_MEMORY                       -6
-#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
-#define CL_MEM_COPY_OVERLAP                         -8
-#define CL_IMAGE_FORMAT_MISMATCH                    -9
-#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
-#define CL_BUILD_PROGRAM_FAILURE                    -11
-#define CL_MAP_FAILURE                              -12
-#ifdef CL_VERSION_1_1
-#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
-#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
-#endif
-#ifdef CL_VERSION_1_2
-#define CL_COMPILE_PROGRAM_FAILURE                  -15
-#define CL_LINKER_NOT_AVAILABLE                     -16
-#define CL_LINK_PROGRAM_FAILURE                     -17
-#define CL_DEVICE_PARTITION_FAILED                  -18
-#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
-#endif
-
-#define CL_INVALID_VALUE                            -30
-#define CL_INVALID_DEVICE_TYPE                      -31
-#define CL_INVALID_PLATFORM                         -32
-#define CL_INVALID_DEVICE                           -33
-#define CL_INVALID_CONTEXT                          -34
-#define CL_INVALID_QUEUE_PROPERTIES                 -35
-#define CL_INVALID_COMMAND_QUEUE                    -36
-#define CL_INVALID_HOST_PTR                         -37
-#define CL_INVALID_MEM_OBJECT                       -38
-#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
-#define CL_INVALID_IMAGE_SIZE                       -40
-#define CL_INVALID_SAMPLER                          -41
-#define CL_INVALID_BINARY                           -42
-#define CL_INVALID_BUILD_OPTIONS                    -43
-#define CL_INVALID_PROGRAM                          -44
-#define CL_INVALID_PROGRAM_EXECUTABLE               -45
-#define CL_INVALID_KERNEL_NAME                      -46
-#define CL_INVALID_KERNEL_DEFINITION                -47
-#define CL_INVALID_KERNEL                           -48
-#define CL_INVALID_ARG_INDEX                        -49
-#define CL_INVALID_ARG_VALUE                        -50
-#define CL_INVALID_ARG_SIZE                         -51
-#define CL_INVALID_KERNEL_ARGS                      -52
-#define CL_INVALID_WORK_DIMENSION                   -53
-#define CL_INVALID_WORK_GROUP_SIZE                  -54
-#define CL_INVALID_WORK_ITEM_SIZE                   -55
-#define CL_INVALID_GLOBAL_OFFSET                    -56
-#define CL_INVALID_EVENT_WAIT_LIST                  -57
-#define CL_INVALID_EVENT                            -58
-#define CL_INVALID_OPERATION                        -59
-#define CL_INVALID_GL_OBJECT                        -60
-#define CL_INVALID_BUFFER_SIZE                      -61
-#define CL_INVALID_MIP_LEVEL                        -62
-#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
-#ifdef CL_VERSION_1_1
-#define CL_INVALID_PROPERTY                         -64
-#endif
-#ifdef CL_VERSION_1_2
-#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
-#define CL_INVALID_COMPILER_OPTIONS                 -66
-#define CL_INVALID_LINKER_OPTIONS                   -67
-#define CL_INVALID_DEVICE_PARTITION_COUNT           -68
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_INVALID_PIPE_SIZE                        -69
-#define CL_INVALID_DEVICE_QUEUE                     -70
-#endif
-#ifdef CL_VERSION_2_2
-#define CL_INVALID_SPEC_ID                          -71
-#define CL_MAX_SIZE_RESTRICTION_EXCEEDED            -72
-#endif
-
-
-/* cl_bool */
-#define CL_FALSE                                    0
-#define CL_TRUE                                     1
-#ifdef CL_VERSION_1_2
-#define CL_BLOCKING                                 CL_TRUE
-#define CL_NON_BLOCKING                             CL_FALSE
-#endif
-
-/* cl_platform_info */
-#define CL_PLATFORM_PROFILE                         0x0900
-#define CL_PLATFORM_VERSION                         0x0901
-#define CL_PLATFORM_NAME                            0x0902
-#define CL_PLATFORM_VENDOR                          0x0903
-#define CL_PLATFORM_EXTENSIONS                      0x0904
-#ifdef CL_VERSION_2_1
-#define CL_PLATFORM_HOST_TIMER_RESOLUTION           0x0905
-#endif
-#ifdef CL_VERSION_3_0
-#define CL_PLATFORM_NUMERIC_VERSION                 0x0906
-#define CL_PLATFORM_EXTENSIONS_WITH_VERSION         0x0907
-#endif
-
-/* cl_device_type - bitfield */
-#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
-#define CL_DEVICE_TYPE_CPU                          (1 << 1)
-#define CL_DEVICE_TYPE_GPU                          (1 << 2)
-#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
-#ifdef CL_VERSION_1_2
-#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
-#endif
-#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
-
-/* cl_device_info */
-#define CL_DEVICE_TYPE                                   0x1000
-#define CL_DEVICE_VENDOR_ID                              0x1001
-#define CL_DEVICE_MAX_COMPUTE_UNITS                      0x1002
-#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS               0x1003
-#define CL_DEVICE_MAX_WORK_GROUP_SIZE                    0x1004
-#define CL_DEVICE_MAX_WORK_ITEM_SIZES                    0x1005
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR            0x1006
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT           0x1007
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT             0x1008
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG            0x1009
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT           0x100A
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE          0x100B
-#define CL_DEVICE_MAX_CLOCK_FREQUENCY                    0x100C
-#define CL_DEVICE_ADDRESS_BITS                           0x100D
-#define CL_DEVICE_MAX_READ_IMAGE_ARGS                    0x100E
-#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS                   0x100F
-#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                     0x1010
-#define CL_DEVICE_IMAGE2D_MAX_WIDTH                      0x1011
-#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                     0x1012
-#define CL_DEVICE_IMAGE3D_MAX_WIDTH                      0x1013
-#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                     0x1014
-#define CL_DEVICE_IMAGE3D_MAX_DEPTH                      0x1015
-#define CL_DEVICE_IMAGE_SUPPORT                          0x1016
-#define CL_DEVICE_MAX_PARAMETER_SIZE                     0x1017
-#define CL_DEVICE_MAX_SAMPLERS                           0x1018
-#define CL_DEVICE_MEM_BASE_ADDR_ALIGN                    0x1019
-#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE               0x101A
-#define CL_DEVICE_SINGLE_FP_CONFIG                       0x101B
-#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE                  0x101C
-#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE              0x101D
-#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE                  0x101E
-#define CL_DEVICE_GLOBAL_MEM_SIZE                        0x101F
-#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE               0x1020
-#define CL_DEVICE_MAX_CONSTANT_ARGS                      0x1021
-#define CL_DEVICE_LOCAL_MEM_TYPE                         0x1022
-#define CL_DEVICE_LOCAL_MEM_SIZE                         0x1023
-#define CL_DEVICE_ERROR_CORRECTION_SUPPORT               0x1024
-#define CL_DEVICE_PROFILING_TIMER_RESOLUTION             0x1025
-#define CL_DEVICE_ENDIAN_LITTLE                          0x1026
-#define CL_DEVICE_AVAILABLE                              0x1027
-#define CL_DEVICE_COMPILER_AVAILABLE                     0x1028
-#define CL_DEVICE_EXECUTION_CAPABILITIES                 0x1029
-#define CL_DEVICE_QUEUE_PROPERTIES                       0x102A    /* deprecated */
-#ifdef CL_VERSION_2_0
-#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES               0x102A
-#endif
-#define CL_DEVICE_NAME                                   0x102B
-#define CL_DEVICE_VENDOR                                 0x102C
-#define CL_DRIVER_VERSION                                0x102D
-#define CL_DEVICE_PROFILE                                0x102E
-#define CL_DEVICE_VERSION                                0x102F
-#define CL_DEVICE_EXTENSIONS                             0x1030
-#define CL_DEVICE_PLATFORM                               0x1031
-#ifdef CL_VERSION_1_2
-#define CL_DEVICE_DOUBLE_FP_CONFIG                       0x1032
-#endif
-/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */
-#ifdef CL_VERSION_1_1
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF            0x1034
-#define CL_DEVICE_HOST_UNIFIED_MEMORY                    0x1035   /* deprecated */
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR               0x1036
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT              0x1037
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT                0x1038
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG               0x1039
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT              0x103A
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE             0x103B
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF               0x103C
-#define CL_DEVICE_OPENCL_C_VERSION                       0x103D
-#endif
-#ifdef CL_VERSION_1_2
-#define CL_DEVICE_LINKER_AVAILABLE                       0x103E
-#define CL_DEVICE_BUILT_IN_KERNELS                       0x103F
-#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE                  0x1040
-#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE                   0x1041
-#define CL_DEVICE_PARENT_DEVICE                          0x1042
-#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES              0x1043
-#define CL_DEVICE_PARTITION_PROPERTIES                   0x1044
-#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN              0x1045
-#define CL_DEVICE_PARTITION_TYPE                         0x1046
-#define CL_DEVICE_REFERENCE_COUNT                        0x1047
-#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC            0x1048
-#define CL_DEVICE_PRINTF_BUFFER_SIZE                     0x1049
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT                  0x104A
-#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT           0x104B
-#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS              0x104C
-#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE               0x104D
-#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES             0x104E
-#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE         0x104F
-#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE               0x1050
-#define CL_DEVICE_MAX_ON_DEVICE_QUEUES                   0x1051
-#define CL_DEVICE_MAX_ON_DEVICE_EVENTS                   0x1052
-#define CL_DEVICE_SVM_CAPABILITIES                       0x1053
-#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE   0x1054
-#define CL_DEVICE_MAX_PIPE_ARGS                          0x1055
-#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS           0x1056
-#define CL_DEVICE_PIPE_MAX_PACKET_SIZE                   0x1057
-#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT    0x1058
-#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT      0x1059
-#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT       0x105A
-#endif
-#ifdef CL_VERSION_2_1
-#define CL_DEVICE_IL_VERSION                             0x105B
-#define CL_DEVICE_MAX_NUM_SUB_GROUPS                     0x105C
-#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D
-#endif
-#ifdef CL_VERSION_3_0
-#define CL_DEVICE_NUMERIC_VERSION                        0x105E
-#define CL_DEVICE_EXTENSIONS_WITH_VERSION                0x1060
-#define CL_DEVICE_ILS_WITH_VERSION                       0x1061
-#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION          0x1062
-#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES             0x1063
-#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES              0x1064
-#define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT         0x1065
-#define CL_DEVICE_OPENCL_C_ALL_VERSIONS                  0x1066
-#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE     0x1067
-#define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068
-#define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT          0x1069
-/* 0x106A to 0x106E - Reserved for upcoming KHR extension */
-#define CL_DEVICE_OPENCL_C_FEATURES                      0x106F
-#define CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES            0x1070
-#define CL_DEVICE_PIPE_SUPPORT                           0x1071
-#define CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED      0x1072
-#endif
-
-/* cl_device_fp_config - bitfield */
-#define CL_FP_DENORM                                (1 << 0)
-#define CL_FP_INF_NAN                               (1 << 1)
-#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
-#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
-#define CL_FP_ROUND_TO_INF                          (1 << 4)
-#define CL_FP_FMA                                   (1 << 5)
-#ifdef CL_VERSION_1_1
-#define CL_FP_SOFT_FLOAT                            (1 << 6)
-#endif
-#ifdef CL_VERSION_1_2
-#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
-#endif
-
-/* cl_device_mem_cache_type */
-#define CL_NONE                                     0x0
-#define CL_READ_ONLY_CACHE                          0x1
-#define CL_READ_WRITE_CACHE                         0x2
-
-/* cl_device_local_mem_type */
-#define CL_LOCAL                                    0x1
-#define CL_GLOBAL                                   0x2
-
-/* cl_device_exec_capabilities - bitfield */
-#define CL_EXEC_KERNEL                              (1 << 0)
-#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
-
-/* cl_command_queue_properties - bitfield */
-#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
-#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
-#ifdef CL_VERSION_2_0
-#define CL_QUEUE_ON_DEVICE                          (1 << 2)
-#define CL_QUEUE_ON_DEVICE_DEFAULT                  (1 << 3)
-#endif
-
-/* cl_context_info */
-#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
-#define CL_CONTEXT_DEVICES                          0x1081
-#define CL_CONTEXT_PROPERTIES                       0x1082
-#ifdef CL_VERSION_1_1
-#define CL_CONTEXT_NUM_DEVICES                      0x1083
-#endif
-
-/* cl_context_properties */
-#define CL_CONTEXT_PLATFORM                         0x1084
-#ifdef CL_VERSION_1_2
-#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_device_partition_property */
-#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
-#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
-#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
-#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_device_affinity_domain */
-#define CL_DEVICE_AFFINITY_DOMAIN_NUMA               (1 << 0)
-#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE           (1 << 1)
-#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE           (1 << 2)
-#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE           (1 << 3)
-#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE           (1 << 4)
-#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
-
-#endif
-
-#ifdef CL_VERSION_2_0
-
-/* cl_device_svm_capabilities */
-#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER           (1 << 0)
-#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER             (1 << 1)
-#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM             (1 << 2)
-#define CL_DEVICE_SVM_ATOMICS                       (1 << 3)
-
-#endif
-
-/* cl_command_queue_info */
-#define CL_QUEUE_CONTEXT                            0x1090
-#define CL_QUEUE_DEVICE                             0x1091
-#define CL_QUEUE_REFERENCE_COUNT                    0x1092
-#define CL_QUEUE_PROPERTIES                         0x1093
-#ifdef CL_VERSION_2_0
-#define CL_QUEUE_SIZE                               0x1094
-#endif
-#ifdef CL_VERSION_2_1
-#define CL_QUEUE_DEVICE_DEFAULT                     0x1095
-#endif
-#ifdef CL_VERSION_3_0
-#define CL_QUEUE_PROPERTIES_ARRAY                   0x1098
-#endif
-
-/* cl_mem_flags and cl_svm_mem_flags - bitfield */
-#define CL_MEM_READ_WRITE                           (1 << 0)
-#define CL_MEM_WRITE_ONLY                           (1 << 1)
-#define CL_MEM_READ_ONLY                            (1 << 2)
-#define CL_MEM_USE_HOST_PTR                         (1 << 3)
-#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
-#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
-/* reserved                                         (1 << 6)    */
-#ifdef CL_VERSION_1_2
-#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
-#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
-#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_MEM_SVM_FINE_GRAIN_BUFFER                (1 << 10)   /* used by cl_svm_mem_flags only */
-#define CL_MEM_SVM_ATOMICS                          (1 << 11)   /* used by cl_svm_mem_flags only */
-#define CL_MEM_KERNEL_READ_AND_WRITE                (1 << 12)
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_mem_migration_flags - bitfield */
-#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
-#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
-
-#endif
-
-/* cl_channel_order */
-#define CL_R                                        0x10B0
-#define CL_A                                        0x10B1
-#define CL_RG                                       0x10B2
-#define CL_RA                                       0x10B3
-#define CL_RGB                                      0x10B4
-#define CL_RGBA                                     0x10B5
-#define CL_BGRA                                     0x10B6
-#define CL_ARGB                                     0x10B7
-#define CL_INTENSITY                                0x10B8
-#define CL_LUMINANCE                                0x10B9
-#ifdef CL_VERSION_1_1
-#define CL_Rx                                       0x10BA
-#define CL_RGx                                      0x10BB
-#define CL_RGBx                                     0x10BC
-#endif
-#ifdef CL_VERSION_1_2
-#define CL_DEPTH                                    0x10BD
-#define CL_DEPTH_STENCIL                            0x10BE
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_sRGB                                     0x10BF
-#define CL_sRGBx                                    0x10C0
-#define CL_sRGBA                                    0x10C1
-#define CL_sBGRA                                    0x10C2
-#define CL_ABGR                                     0x10C3
-#endif
-
-/* cl_channel_type */
-#define CL_SNORM_INT8                               0x10D0
-#define CL_SNORM_INT16                              0x10D1
-#define CL_UNORM_INT8                               0x10D2
-#define CL_UNORM_INT16                              0x10D3
-#define CL_UNORM_SHORT_565                          0x10D4
-#define CL_UNORM_SHORT_555                          0x10D5
-#define CL_UNORM_INT_101010                         0x10D6
-#define CL_SIGNED_INT8                              0x10D7
-#define CL_SIGNED_INT16                             0x10D8
-#define CL_SIGNED_INT32                             0x10D9
-#define CL_UNSIGNED_INT8                            0x10DA
-#define CL_UNSIGNED_INT16                           0x10DB
-#define CL_UNSIGNED_INT32                           0x10DC
-#define CL_HALF_FLOAT                               0x10DD
-#define CL_FLOAT                                    0x10DE
-#ifdef CL_VERSION_1_2
-#define CL_UNORM_INT24                              0x10DF
-#endif
-#ifdef CL_VERSION_2_1
-#define CL_UNORM_INT_101010_2                       0x10E0
-#endif
-
-/* cl_mem_object_type */
-#define CL_MEM_OBJECT_BUFFER                        0x10F0
-#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
-#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
-#ifdef CL_VERSION_1_2
-#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
-#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
-#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
-#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_MEM_OBJECT_PIPE                          0x10F7
-#endif
-
-/* cl_mem_info */
-#define CL_MEM_TYPE                                 0x1100
-#define CL_MEM_FLAGS                                0x1101
-#define CL_MEM_SIZE                                 0x1102
-#define CL_MEM_HOST_PTR                             0x1103
-#define CL_MEM_MAP_COUNT                            0x1104
-#define CL_MEM_REFERENCE_COUNT                      0x1105
-#define CL_MEM_CONTEXT                              0x1106
-#ifdef CL_VERSION_1_1
-#define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
-#define CL_MEM_OFFSET                               0x1108
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_MEM_USES_SVM_POINTER                     0x1109
-#endif
-#ifdef CL_VERSION_3_0
-#define CL_MEM_PROPERTIES                           0x110A
-#endif
-
-/* cl_image_info */
-#define CL_IMAGE_FORMAT                             0x1110
-#define CL_IMAGE_ELEMENT_SIZE                       0x1111
-#define CL_IMAGE_ROW_PITCH                          0x1112
-#define CL_IMAGE_SLICE_PITCH                        0x1113
-#define CL_IMAGE_WIDTH                              0x1114
-#define CL_IMAGE_HEIGHT                             0x1115
-#define CL_IMAGE_DEPTH                              0x1116
-#ifdef CL_VERSION_1_2
-#define CL_IMAGE_ARRAY_SIZE                         0x1117
-#define CL_IMAGE_BUFFER                             0x1118
-#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
-#define CL_IMAGE_NUM_SAMPLES                        0x111A
-#endif
-
-
-/* cl_pipe_info */
-#ifdef CL_VERSION_2_0
-#define CL_PIPE_PACKET_SIZE                         0x1120
-#define CL_PIPE_MAX_PACKETS                         0x1121
-#endif
-#ifdef CL_VERSION_3_0
-#define CL_PIPE_PROPERTIES                          0x1122
-#endif
-
-/* cl_addressing_mode */
-#define CL_ADDRESS_NONE                             0x1130
-#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
-#define CL_ADDRESS_CLAMP                            0x1132
-#define CL_ADDRESS_REPEAT                           0x1133
-#ifdef CL_VERSION_1_1
-#define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
-#endif
-
-/* cl_filter_mode */
-#define CL_FILTER_NEAREST                           0x1140
-#define CL_FILTER_LINEAR                            0x1141
-
-/* cl_sampler_info */
-#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
-#define CL_SAMPLER_CONTEXT                          0x1151
-#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
-#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
-#define CL_SAMPLER_FILTER_MODE                      0x1154
-#ifdef CL_VERSION_2_0
-/* These enumerants are for the cl_khr_mipmap_image extension.
-   They have since been added to cl_ext.h with an appropriate
-   KHR suffix, but are left here for backwards compatibility. */
-#define CL_SAMPLER_MIP_FILTER_MODE                  0x1155
-#define CL_SAMPLER_LOD_MIN                          0x1156
-#define CL_SAMPLER_LOD_MAX                          0x1157
-#endif
-#ifdef CL_VERSION_3_0
-#define CL_SAMPLER_PROPERTIES                       0x1158
-#endif
-
-/* cl_map_flags - bitfield */
-#define CL_MAP_READ                                 (1 << 0)
-#define CL_MAP_WRITE                                (1 << 1)
-#ifdef CL_VERSION_1_2
-#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
-#endif
-
-/* cl_program_info */
-#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
-#define CL_PROGRAM_CONTEXT                          0x1161
-#define CL_PROGRAM_NUM_DEVICES                      0x1162
-#define CL_PROGRAM_DEVICES                          0x1163
-#define CL_PROGRAM_SOURCE                           0x1164
-#define CL_PROGRAM_BINARY_SIZES                     0x1165
-#define CL_PROGRAM_BINARIES                         0x1166
-#ifdef CL_VERSION_1_2
-#define CL_PROGRAM_NUM_KERNELS                      0x1167
-#define CL_PROGRAM_KERNEL_NAMES                     0x1168
-#endif
-#ifdef CL_VERSION_2_1
-#define CL_PROGRAM_IL                               0x1169
-#endif
-#ifdef CL_VERSION_2_2
-#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT       0x116A
-#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT       0x116B
-#endif
-
-/* cl_program_build_info */
-#define CL_PROGRAM_BUILD_STATUS                     0x1181
-#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
-#define CL_PROGRAM_BUILD_LOG                        0x1183
-#ifdef CL_VERSION_1_2
-#define CL_PROGRAM_BINARY_TYPE                      0x1184
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_program_binary_type */
-#define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
-#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT      0x1
-#define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
-#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE           0x4
-
-#endif
-
-/* cl_build_status */
-#define CL_BUILD_SUCCESS                            0
-#define CL_BUILD_NONE                               -1
-#define CL_BUILD_ERROR                              -2
-#define CL_BUILD_IN_PROGRESS                        -3
-
-/* cl_kernel_info */
-#define CL_KERNEL_FUNCTION_NAME                     0x1190
-#define CL_KERNEL_NUM_ARGS                          0x1191
-#define CL_KERNEL_REFERENCE_COUNT                   0x1192
-#define CL_KERNEL_CONTEXT                           0x1193
-#define CL_KERNEL_PROGRAM                           0x1194
-#ifdef CL_VERSION_1_2
-#define CL_KERNEL_ATTRIBUTES                        0x1195
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_kernel_arg_info */
-#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
-#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
-#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
-#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
-#define CL_KERNEL_ARG_NAME                          0x119A
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_kernel_arg_address_qualifier */
-#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
-#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
-#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
-#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_kernel_arg_access_qualifier */
-#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
-#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
-#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
-#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* cl_kernel_arg_type_qualifier */
-#define CL_KERNEL_ARG_TYPE_NONE                     0
-#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
-#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
-#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
-#ifdef CL_VERSION_2_0
-#define CL_KERNEL_ARG_TYPE_PIPE                     (1 << 3)
-#endif
-
-#endif
-
-/* cl_kernel_work_group_info */
-#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
-#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
-#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
-#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
-#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
-#ifdef CL_VERSION_1_2
-#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
-#endif
-
-#ifdef CL_VERSION_2_1
-
-/* cl_kernel_sub_group_info */
-#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE    0x2033
-#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE       0x2034
-#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT    0x11B8
-#define CL_KERNEL_MAX_NUM_SUB_GROUPS                0x11B9
-#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS            0x11BA
-
-#endif
-
-#ifdef CL_VERSION_2_0
-
-/* cl_kernel_exec_info */
-#define CL_KERNEL_EXEC_INFO_SVM_PTRS                0x11B6
-#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM   0x11B7
-
-#endif
-
-/* cl_event_info */
-#define CL_EVENT_COMMAND_QUEUE                      0x11D0
-#define CL_EVENT_COMMAND_TYPE                       0x11D1
-#define CL_EVENT_REFERENCE_COUNT                    0x11D2
-#define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
-#ifdef CL_VERSION_1_1
-#define CL_EVENT_CONTEXT                            0x11D4
-#endif
-
-/* cl_command_type */
-#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
-#define CL_COMMAND_TASK                             0x11F1
-#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
-#define CL_COMMAND_READ_BUFFER                      0x11F3
-#define CL_COMMAND_WRITE_BUFFER                     0x11F4
-#define CL_COMMAND_COPY_BUFFER                      0x11F5
-#define CL_COMMAND_READ_IMAGE                       0x11F6
-#define CL_COMMAND_WRITE_IMAGE                      0x11F7
-#define CL_COMMAND_COPY_IMAGE                       0x11F8
-#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
-#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
-#define CL_COMMAND_MAP_BUFFER                       0x11FB
-#define CL_COMMAND_MAP_IMAGE                        0x11FC
-#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
-#define CL_COMMAND_MARKER                           0x11FE
-#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
-#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
-#ifdef CL_VERSION_1_1
-#define CL_COMMAND_READ_BUFFER_RECT                 0x1201
-#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
-#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
-#define CL_COMMAND_USER                             0x1204
-#endif
-#ifdef CL_VERSION_1_2
-#define CL_COMMAND_BARRIER                          0x1205
-#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
-#define CL_COMMAND_FILL_BUFFER                      0x1207
-#define CL_COMMAND_FILL_IMAGE                       0x1208
-#endif
-#ifdef CL_VERSION_2_0
-#define CL_COMMAND_SVM_FREE                         0x1209
-#define CL_COMMAND_SVM_MEMCPY                       0x120A
-#define CL_COMMAND_SVM_MEMFILL                      0x120B
-#define CL_COMMAND_SVM_MAP                          0x120C
-#define CL_COMMAND_SVM_UNMAP                        0x120D
-#endif
-#ifdef CL_VERSION_3_0
-#define CL_COMMAND_SVM_MIGRATE_MEM                  0x120E
-#endif
-
-/* command execution status */
-#define CL_COMPLETE                                 0x0
-#define CL_RUNNING                                  0x1
-#define CL_SUBMITTED                                0x2
-#define CL_QUEUED                                   0x3
-
-/* cl_buffer_create_type */
-#ifdef CL_VERSION_1_1
-#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
-#endif
-
-/* cl_profiling_info */
-#define CL_PROFILING_COMMAND_QUEUED                 0x1280
-#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
-#define CL_PROFILING_COMMAND_START                  0x1282
-#define CL_PROFILING_COMMAND_END                    0x1283
-#ifdef CL_VERSION_2_0
-#define CL_PROFILING_COMMAND_COMPLETE               0x1284
-#endif
-
-/* cl_device_atomic_capabilities - bitfield */
-#ifdef CL_VERSION_3_0
-#define CL_DEVICE_ATOMIC_ORDER_RELAXED          (1 << 0)
-#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL          (1 << 1)
-#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST          (1 << 2)
-#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM        (1 << 3)
-#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP       (1 << 4)
-#define CL_DEVICE_ATOMIC_SCOPE_DEVICE           (1 << 5)
-#define CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES      (1 << 6)
-#endif
-
-/* cl_device_device_enqueue_capabilities - bitfield */
-#ifdef CL_VERSION_3_0
-#define CL_DEVICE_QUEUE_SUPPORTED               (1 << 0)
-#define CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT     (1 << 1)
-#endif
-
-/* cl_khronos_vendor_id */
-#define CL_KHRONOS_VENDOR_ID_CODEPLAY               0x10004
-
-#ifdef CL_VERSION_3_0
-
-/* cl_version */
-#define CL_VERSION_MAJOR_BITS (10)
-#define CL_VERSION_MINOR_BITS (10)
-#define CL_VERSION_PATCH_BITS (12)
-
-#define CL_VERSION_MAJOR_MASK ((1 << CL_VERSION_MAJOR_BITS) - 1)
-#define CL_VERSION_MINOR_MASK ((1 << CL_VERSION_MINOR_BITS) - 1)
-#define CL_VERSION_PATCH_MASK ((1 << CL_VERSION_PATCH_BITS) - 1)
-
-#define CL_VERSION_MAJOR(version) \
-  ((version) >> (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS))
-
-#define CL_VERSION_MINOR(version) \
-  (((version) >> CL_VERSION_PATCH_BITS) & CL_VERSION_MINOR_MASK)
-
-#define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK)
-
-#define CL_MAKE_VERSION(major, minor, patch)                      \
-  ((((major) & CL_VERSION_MAJOR_MASK)                             \
-       << (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) |      \
-   (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \
-   ((patch) & CL_VERSION_PATCH_MASK))
-
-#endif
-
-/********************************************************************************************************/
-
-/* Platform API */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetPlatformIDs(cl_uint          num_entries,
-                 cl_platform_id * platforms,
-                 cl_uint *        num_platforms) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetPlatformInfo(cl_platform_id   platform,
-                  cl_platform_info param_name,
-                  size_t           param_value_size,
-                  void *           param_value,
-                  size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-/* Device APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceIDs(cl_platform_id   platform,
-               cl_device_type   device_type,
-               cl_uint          num_entries,
-               cl_device_id *   devices,
-               cl_uint *        num_devices) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceInfo(cl_device_id    device,
-                cl_device_info  param_name,
-                size_t          param_value_size,
-                void *          param_value,
-                size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCreateSubDevices(cl_device_id                         in_device,
-                   const cl_device_partition_property * properties,
-                   cl_uint                              num_devices,
-                   cl_device_id *                       out_devices,
-                   cl_uint *                            num_devices_ret) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-#ifdef CL_VERSION_2_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetDefaultDeviceCommandQueue(cl_context           context,
-                               cl_device_id         device,
-                               cl_command_queue     command_queue) CL_API_SUFFIX__VERSION_2_1;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceAndHostTimer(cl_device_id    device,
-                        cl_ulong*       device_timestamp,
-                        cl_ulong*       host_timestamp) CL_API_SUFFIX__VERSION_2_1;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetHostTimer(cl_device_id device,
-               cl_ulong *   host_timestamp) CL_API_SUFFIX__VERSION_2_1;
-
-#endif
-
-/* Context APIs */
-extern CL_API_ENTRY cl_context CL_API_CALL
-clCreateContext(const cl_context_properties * properties,
-                cl_uint              num_devices,
-                const cl_device_id * devices,
-                void (CL_CALLBACK * pfn_notify)(const char * errinfo,
-                                                const void * private_info,
-                                                size_t       cb,
-                                                void *       user_data),
-                void *               user_data,
-                cl_int *             errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_context CL_API_CALL
-clCreateContextFromType(const cl_context_properties * properties,
-                        cl_device_type      device_type,
-                        void (CL_CALLBACK * pfn_notify)(const char * errinfo,
-                                                        const void * private_info,
-                                                        size_t       cb,
-                                                        void *       user_data),
-                        void *              user_data,
-                        cl_int *            errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetContextInfo(cl_context         context,
-                 cl_context_info    param_name,
-                 size_t             param_value_size,
-                 void *             param_value,
-                 size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_3_0
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetContextDestructorCallback(cl_context         context,
-                               void (CL_CALLBACK* pfn_notify)(cl_context context,
-                                                              void* user_data),
-                               void*              user_data) CL_API_SUFFIX__VERSION_3_0;
-
-#endif
-
-/* Command Queue APIs */
-
-#ifdef CL_VERSION_2_0
-
-extern CL_API_ENTRY cl_command_queue CL_API_CALL
-clCreateCommandQueueWithProperties(cl_context               context,
-                                   cl_device_id             device,
-                                   const cl_queue_properties *    properties,
-                                   cl_int *                 errcode_ret) CL_API_SUFFIX__VERSION_2_0;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetCommandQueueInfo(cl_command_queue      command_queue,
-                      cl_command_queue_info param_name,
-                      size_t                param_value_size,
-                      void *                param_value,
-                      size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-/* Memory Object APIs */
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateBuffer(cl_context   context,
-               cl_mem_flags flags,
-               size_t       size,
-               void *       host_ptr,
-               cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_1
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateSubBuffer(cl_mem                   buffer,
-                  cl_mem_flags             flags,
-                  cl_buffer_create_type    buffer_create_type,
-                  const void *             buffer_create_info,
-                  cl_int *                 errcode_ret) CL_API_SUFFIX__VERSION_1_1;
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateImage(cl_context              context,
-              cl_mem_flags            flags,
-              const cl_image_format * image_format,
-              const cl_image_desc *   image_desc,
-              void *                  host_ptr,
-              cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-#ifdef CL_VERSION_2_0
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreatePipe(cl_context                 context,
-             cl_mem_flags               flags,
-             cl_uint                    pipe_packet_size,
-             cl_uint                    pipe_max_packets,
-             const cl_pipe_properties * properties,
-             cl_int *                   errcode_ret) CL_API_SUFFIX__VERSION_2_0;
-
-#endif
-
-#ifdef CL_VERSION_3_0
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateBufferWithProperties(cl_context                context,
-                             const cl_mem_properties * properties,
-                             cl_mem_flags              flags,
-                             size_t                    size,
-                             void *                    host_ptr,
-                             cl_int *                  errcode_ret) CL_API_SUFFIX__VERSION_3_0;
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateImageWithProperties(cl_context                context,
-                            const cl_mem_properties * properties,
-                            cl_mem_flags              flags,
-                            const cl_image_format *   image_format,
-                            const cl_image_desc *     image_desc,
-                            void *                    host_ptr,
-                            cl_int *                  errcode_ret) CL_API_SUFFIX__VERSION_3_0;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetSupportedImageFormats(cl_context           context,
-                           cl_mem_flags         flags,
-                           cl_mem_object_type   image_type,
-                           cl_uint              num_entries,
-                           cl_image_format *    image_formats,
-                           cl_uint *            num_image_formats) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetMemObjectInfo(cl_mem           memobj,
-                   cl_mem_info      param_name,
-                   size_t           param_value_size,
-                   void *           param_value,
-                   size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetImageInfo(cl_mem           image,
-               cl_image_info    param_name,
-               size_t           param_value_size,
-               void *           param_value,
-               size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_2_0
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetPipeInfo(cl_mem           pipe,
-              cl_pipe_info     param_name,
-              size_t           param_value_size,
-              void *           param_value,
-              size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_2_0;
-
-#endif
-
-#ifdef CL_VERSION_1_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetMemObjectDestructorCallback(cl_mem memobj,
-                                 void (CL_CALLBACK * pfn_notify)(cl_mem memobj,
-                                                                 void * user_data),
-                                 void * user_data) CL_API_SUFFIX__VERSION_1_1;
-
-#endif
-
-/* SVM Allocation APIs */
-
-#ifdef CL_VERSION_2_0
-
-extern CL_API_ENTRY void * CL_API_CALL
-clSVMAlloc(cl_context       context,
-           cl_svm_mem_flags flags,
-           size_t           size,
-           cl_uint          alignment) CL_API_SUFFIX__VERSION_2_0;
-
-extern CL_API_ENTRY void CL_API_CALL
-clSVMFree(cl_context        context,
-          void *            svm_pointer) CL_API_SUFFIX__VERSION_2_0;
-
-#endif
-
-/* Sampler APIs */
-
-#ifdef CL_VERSION_2_0
-
-extern CL_API_ENTRY cl_sampler CL_API_CALL
-clCreateSamplerWithProperties(cl_context                     context,
-                              const cl_sampler_properties *  sampler_properties,
-                              cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_2_0;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetSamplerInfo(cl_sampler         sampler,
-                 cl_sampler_info    param_name,
-                 size_t             param_value_size,
-                 void *             param_value,
-                 size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-/* Program Object APIs */
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithSource(cl_context        context,
-                          cl_uint           count,
-                          const char **     strings,
-                          const size_t *    lengths,
-                          cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithBinary(cl_context                     context,
-                          cl_uint                        num_devices,
-                          const cl_device_id *           device_list,
-                          const size_t *                 lengths,
-                          const unsigned char **         binaries,
-                          cl_int *                       binary_status,
-                          cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithBuiltInKernels(cl_context            context,
-                                  cl_uint               num_devices,
-                                  const cl_device_id *  device_list,
-                                  const char *          kernel_names,
-                                  cl_int *              errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-#ifdef CL_VERSION_2_1
-
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithIL(cl_context    context,
-                     const void*    il,
-                     size_t         length,
-                     cl_int*        errcode_ret) CL_API_SUFFIX__VERSION_2_1;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clBuildProgram(cl_program           program,
-               cl_uint              num_devices,
-               const cl_device_id * device_list,
-               const char *         options,
-               void (CL_CALLBACK *  pfn_notify)(cl_program program,
-                                                void * user_data),
-               void *               user_data) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCompileProgram(cl_program           program,
-                 cl_uint              num_devices,
-                 const cl_device_id * device_list,
-                 const char *         options,
-                 cl_uint              num_input_headers,
-                 const cl_program *   input_headers,
-                 const char **        header_include_names,
-                 void (CL_CALLBACK *  pfn_notify)(cl_program program,
-                                                  void * user_data),
-                 void *               user_data) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_program CL_API_CALL
-clLinkProgram(cl_context           context,
-              cl_uint              num_devices,
-              const cl_device_id * device_list,
-              const char *         options,
-              cl_uint              num_input_programs,
-              const cl_program *   input_programs,
-              void (CL_CALLBACK *  pfn_notify)(cl_program program,
-                                               void * user_data),
-              void *               user_data,
-              cl_int *             errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-#ifdef CL_VERSION_2_2
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL
-clSetProgramReleaseCallback(cl_program          program,
-                            void (CL_CALLBACK * pfn_notify)(cl_program program,
-                                                            void * user_data),
-                            void *              user_data) CL_API_SUFFIX__VERSION_2_2_DEPRECATED;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetProgramSpecializationConstant(cl_program  program,
-                                   cl_uint     spec_id,
-                                   size_t      spec_size,
-                                   const void* spec_value) CL_API_SUFFIX__VERSION_2_2;
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetProgramInfo(cl_program         program,
-                 cl_program_info    param_name,
-                 size_t             param_value_size,
-                 void *             param_value,
-                 size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetProgramBuildInfo(cl_program            program,
-                      cl_device_id          device,
-                      cl_program_build_info param_name,
-                      size_t                param_value_size,
-                      void *                param_value,
-                      size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-/* Kernel Object APIs */
-extern CL_API_ENTRY cl_kernel CL_API_CALL
-clCreateKernel(cl_program      program,
-               const char *    kernel_name,
-               cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCreateKernelsInProgram(cl_program     program,
-                         cl_uint        num_kernels,
-                         cl_kernel *    kernels,
-                         cl_uint *      num_kernels_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_2_1
-
-extern CL_API_ENTRY cl_kernel CL_API_CALL
-clCloneKernel(cl_kernel     source_kernel,
-              cl_int*       errcode_ret) CL_API_SUFFIX__VERSION_2_1;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainKernel(cl_kernel    kernel) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseKernel(cl_kernel   kernel) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArg(cl_kernel    kernel,
-               cl_uint      arg_index,
-               size_t       arg_size,
-               const void * arg_value) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_2_0
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArgSVMPointer(cl_kernel    kernel,
-                         cl_uint      arg_index,
-                         const void * arg_value) CL_API_SUFFIX__VERSION_2_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelExecInfo(cl_kernel            kernel,
-                    cl_kernel_exec_info  param_name,
-                    size_t               param_value_size,
-                    const void *         param_value) CL_API_SUFFIX__VERSION_2_0;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelInfo(cl_kernel       kernel,
-                cl_kernel_info  param_name,
-                size_t          param_value_size,
-                void *          param_value,
-                size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelArgInfo(cl_kernel       kernel,
-                   cl_uint         arg_indx,
-                   cl_kernel_arg_info  param_name,
-                   size_t          param_value_size,
-                   void *          param_value,
-                   size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelWorkGroupInfo(cl_kernel                  kernel,
-                         cl_device_id               device,
-                         cl_kernel_work_group_info  param_name,
-                         size_t                     param_value_size,
-                         void *                     param_value,
-                         size_t *                   param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_2_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelSubGroupInfo(cl_kernel                   kernel,
-                        cl_device_id                device,
-                        cl_kernel_sub_group_info    param_name,
-                        size_t                      input_value_size,
-                        const void*                 input_value,
-                        size_t                      param_value_size,
-                        void*                       param_value,
-                        size_t*                     param_value_size_ret) CL_API_SUFFIX__VERSION_2_1;
-
-#endif
-
-/* Event Object APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clWaitForEvents(cl_uint             num_events,
-                const cl_event *    event_list) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetEventInfo(cl_event         event,
-               cl_event_info    param_name,
-               size_t           param_value_size,
-               void *           param_value,
-               size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_1
-
-extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateUserEvent(cl_context    context,
-                  cl_int *      errcode_ret) CL_API_SUFFIX__VERSION_1_1;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetUserEventStatus(cl_event   event,
-                     cl_int     execution_status) CL_API_SUFFIX__VERSION_1_1;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetEventCallback(cl_event    event,
-                   cl_int      command_exec_callback_type,
-                   void (CL_CALLBACK * pfn_notify)(cl_event event,
-                                                   cl_int   event_command_status,
-                                                   void *   user_data),
-                   void *      user_data) CL_API_SUFFIX__VERSION_1_1;
-
-#endif
-
-/* Profiling APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetEventProfilingInfo(cl_event            event,
-                        cl_profiling_info   param_name,
-                        size_t              param_value_size,
-                        void *              param_value,
-                        size_t *            param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-/* Flush and Finish APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-
-/* Enqueued Commands APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReadBuffer(cl_command_queue    command_queue,
-                    cl_mem              buffer,
-                    cl_bool             blocking_read,
-                    size_t              offset,
-                    size_t              size,
-                    void *              ptr,
-                    cl_uint             num_events_in_wait_list,
-                    const cl_event *    event_wait_list,
-                    cl_event *          event) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReadBufferRect(cl_command_queue    command_queue,
-                        cl_mem              buffer,
-                        cl_bool             blocking_read,
-                        const size_t *      buffer_origin,
-                        const size_t *      host_origin,
-                        const size_t *      region,
-                        size_t              buffer_row_pitch,
-                        size_t              buffer_slice_pitch,
-                        size_t              host_row_pitch,
-                        size_t              host_slice_pitch,
-                        void *              ptr,
-                        cl_uint             num_events_in_wait_list,
-                        const cl_event *    event_wait_list,
-                        cl_event *          event) CL_API_SUFFIX__VERSION_1_1;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueWriteBuffer(cl_command_queue   command_queue,
-                     cl_mem             buffer,
-                     cl_bool            blocking_write,
-                     size_t             offset,
-                     size_t             size,
-                     const void *       ptr,
-                     cl_uint            num_events_in_wait_list,
-                     const cl_event *   event_wait_list,
-                     cl_event *         event) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueWriteBufferRect(cl_command_queue    command_queue,
-                         cl_mem              buffer,
-                         cl_bool             blocking_write,
-                         const size_t *      buffer_origin,
-                         const size_t *      host_origin,
-                         const size_t *      region,
-                         size_t              buffer_row_pitch,
-                         size_t              buffer_slice_pitch,
-                         size_t              host_row_pitch,
-                         size_t              host_slice_pitch,
-                         const void *        ptr,
-                         cl_uint             num_events_in_wait_list,
-                         const cl_event *    event_wait_list,
-                         cl_event *          event) CL_API_SUFFIX__VERSION_1_1;
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueFillBuffer(cl_command_queue   command_queue,
-                    cl_mem             buffer,
-                    const void *       pattern,
-                    size_t             pattern_size,
-                    size_t             offset,
-                    size_t             size,
-                    cl_uint            num_events_in_wait_list,
-                    const cl_event *   event_wait_list,
-                    cl_event *         event) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyBuffer(cl_command_queue    command_queue,
-                    cl_mem              src_buffer,
-                    cl_mem              dst_buffer,
-                    size_t              src_offset,
-                    size_t              dst_offset,
-                    size_t              size,
-                    cl_uint             num_events_in_wait_list,
-                    const cl_event *    event_wait_list,
-                    cl_event *          event) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyBufferRect(cl_command_queue    command_queue,
-                        cl_mem              src_buffer,
-                        cl_mem              dst_buffer,
-                        const size_t *      src_origin,
-                        const size_t *      dst_origin,
-                        const size_t *      region,
-                        size_t              src_row_pitch,
-                        size_t              src_slice_pitch,
-                        size_t              dst_row_pitch,
-                        size_t              dst_slice_pitch,
-                        cl_uint             num_events_in_wait_list,
-                        const cl_event *    event_wait_list,
-                        cl_event *          event) CL_API_SUFFIX__VERSION_1_1;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReadImage(cl_command_queue     command_queue,
-                   cl_mem               image,
-                   cl_bool              blocking_read,
-                   const size_t *       origin,
-                   const size_t *       region,
-                   size_t               row_pitch,
-                   size_t               slice_pitch,
-                   void *               ptr,
-                   cl_uint              num_events_in_wait_list,
-                   const cl_event *     event_wait_list,
-                   cl_event *           event) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueWriteImage(cl_command_queue    command_queue,
-                    cl_mem              image,
-                    cl_bool             blocking_write,
-                    const size_t *      origin,
-                    const size_t *      region,
-                    size_t              input_row_pitch,
-                    size_t              input_slice_pitch,
-                    const void *        ptr,
-                    cl_uint             num_events_in_wait_list,
-                    const cl_event *    event_wait_list,
-                    cl_event *          event) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueFillImage(cl_command_queue   command_queue,
-                   cl_mem             image,
-                   const void *       fill_color,
-                   const size_t *     origin,
-                   const size_t *     region,
-                   cl_uint            num_events_in_wait_list,
-                   const cl_event *   event_wait_list,
-                   cl_event *         event) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyImage(cl_command_queue     command_queue,
-                   cl_mem               src_image,
-                   cl_mem               dst_image,
-                   const size_t *       src_origin,
-                   const size_t *       dst_origin,
-                   const size_t *       region,
-                   cl_uint              num_events_in_wait_list,
-                   const cl_event *     event_wait_list,
-                   cl_event *           event) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
-                           cl_mem           src_image,
-                           cl_mem           dst_buffer,
-                           const size_t *   src_origin,
-                           const size_t *   region,
-                           size_t           dst_offset,
-                           cl_uint          num_events_in_wait_list,
-                           const cl_event * event_wait_list,
-                           cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyBufferToImage(cl_command_queue command_queue,
-                           cl_mem           src_buffer,
-                           cl_mem           dst_image,
-                           size_t           src_offset,
-                           const size_t *   dst_origin,
-                           const size_t *   region,
-                           cl_uint          num_events_in_wait_list,
-                           const cl_event * event_wait_list,
-                           cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY void * CL_API_CALL
-clEnqueueMapBuffer(cl_command_queue command_queue,
-                   cl_mem           buffer,
-                   cl_bool          blocking_map,
-                   cl_map_flags     map_flags,
-                   size_t           offset,
-                   size_t           size,
-                   cl_uint          num_events_in_wait_list,
-                   const cl_event * event_wait_list,
-                   cl_event *       event,
-                   cl_int *         errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY void * CL_API_CALL
-clEnqueueMapImage(cl_command_queue  command_queue,
-                  cl_mem            image,
-                  cl_bool           blocking_map,
-                  cl_map_flags      map_flags,
-                  const size_t *    origin,
-                  const size_t *    region,
-                  size_t *          image_row_pitch,
-                  size_t *          image_slice_pitch,
-                  cl_uint           num_events_in_wait_list,
-                  const cl_event *  event_wait_list,
-                  cl_event *        event,
-                  cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueUnmapMemObject(cl_command_queue command_queue,
-                        cl_mem           memobj,
-                        void *           mapped_ptr,
-                        cl_uint          num_events_in_wait_list,
-                        const cl_event * event_wait_list,
-                        cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMigrateMemObjects(cl_command_queue       command_queue,
-                           cl_uint                num_mem_objects,
-                           const cl_mem *         mem_objects,
-                           cl_mem_migration_flags flags,
-                           cl_uint                num_events_in_wait_list,
-                           const cl_event *       event_wait_list,
-                           cl_event *             event) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueNDRangeKernel(cl_command_queue command_queue,
-                       cl_kernel        kernel,
-                       cl_uint          work_dim,
-                       const size_t *   global_work_offset,
-                       const size_t *   global_work_size,
-                       const size_t *   local_work_size,
-                       cl_uint          num_events_in_wait_list,
-                       const cl_event * event_wait_list,
-                       cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueNativeKernel(cl_command_queue  command_queue,
-                      void (CL_CALLBACK * user_func)(void *),
-                      void *            args,
-                      size_t            cb_args,
-                      cl_uint           num_mem_objects,
-                      const cl_mem *    mem_list,
-                      const void **     args_mem_loc,
-                      cl_uint           num_events_in_wait_list,
-                      const cl_event *  event_wait_list,
-                      cl_event *        event) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMarkerWithWaitList(cl_command_queue  command_queue,
-                            cl_uint           num_events_in_wait_list,
-                            const cl_event *  event_wait_list,
-                            cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueBarrierWithWaitList(cl_command_queue  command_queue,
-                             cl_uint           num_events_in_wait_list,
-                             const cl_event *  event_wait_list,
-                             cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-#ifdef CL_VERSION_2_0
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMFree(cl_command_queue  command_queue,
-                 cl_uint           num_svm_pointers,
-                 void *            svm_pointers[],
-                 void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
-                                                    cl_uint          num_svm_pointers,
-                                                    void *           svm_pointers[],
-                                                    void *           user_data),
-                 void *            user_data,
-                 cl_uint           num_events_in_wait_list,
-                 const cl_event *  event_wait_list,
-                 cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemcpy(cl_command_queue  command_queue,
-                   cl_bool           blocking_copy,
-                   void *            dst_ptr,
-                   const void *      src_ptr,
-                   size_t            size,
-                   cl_uint           num_events_in_wait_list,
-                   const cl_event *  event_wait_list,
-                   cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemFill(cl_command_queue  command_queue,
-                    void *            svm_ptr,
-                    const void *      pattern,
-                    size_t            pattern_size,
-                    size_t            size,
-                    cl_uint           num_events_in_wait_list,
-                    const cl_event *  event_wait_list,
-                    cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMap(cl_command_queue  command_queue,
-                cl_bool           blocking_map,
-                cl_map_flags      flags,
-                void *            svm_ptr,
-                size_t            size,
-                cl_uint           num_events_in_wait_list,
-                const cl_event *  event_wait_list,
-                cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMUnmap(cl_command_queue  command_queue,
-                  void *            svm_ptr,
-                  cl_uint           num_events_in_wait_list,
-                  const cl_event *  event_wait_list,
-                  cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-
-#endif
-
-#ifdef CL_VERSION_2_1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMigrateMem(cl_command_queue         command_queue,
-                       cl_uint                  num_svm_pointers,
-                       const void **            svm_pointers,
-                       const size_t *           sizes,
-                       cl_mem_migration_flags   flags,
-                       cl_uint                  num_events_in_wait_list,
-                       const cl_event *         event_wait_list,
-                       cl_event *               event) CL_API_SUFFIX__VERSION_2_1;
-
-#endif
-
-#ifdef CL_VERSION_1_2
-
-/* Extension function access
- *
- * Returns the extension function address for the given function name,
- * or NULL if a valid function can not be found.  The client must
- * check to make sure the address is not NULL, before using or
- * calling the returned function address.
- */
-extern CL_API_ENTRY void * CL_API_CALL
-clGetExtensionFunctionAddressForPlatform(cl_platform_id platform,
-                                         const char *   func_name) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
-    /*
-     *  WARNING:
-     *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
-     *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
-     *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
-     *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
-     *
-     *  Software developers previously relying on this API are instructed to set the command queue
-     *  properties when creating the queue, instead.
-     */
-    extern CL_API_ENTRY cl_int CL_API_CALL
-    clSetCommandQueueProperty(cl_command_queue              command_queue,
-                              cl_command_queue_properties   properties,
-                              cl_bool                       enable,
-                              cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED;
-#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
-
-/* Deprecated OpenCL 1.1 APIs */
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateImage2D(cl_context              context,
-                cl_mem_flags            flags,
-                const cl_image_format * image_format,
-                size_t                  image_width,
-                size_t                  image_height,
-                size_t                  image_row_pitch,
-                void *                  host_ptr,
-                cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateImage3D(cl_context              context,
-                cl_mem_flags            flags,
-                const cl_image_format * image_format,
-                size_t                  image_width,
-                size_t                  image_height,
-                size_t                  image_depth,
-                size_t                  image_row_pitch,
-                size_t                  image_slice_pitch,
-                void *                  host_ptr,
-                cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clEnqueueMarker(cl_command_queue    command_queue,
-                cl_event *          event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clEnqueueWaitForEvents(cl_command_queue  command_queue,
-                        cl_uint          num_events,
-                        const cl_event * event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
-clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
-clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-/* Deprecated OpenCL 2.0 APIs */
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL
-clCreateCommandQueue(cl_context                     context,
-                     cl_device_id                   device,
-                     cl_command_queue_properties    properties,
-                     cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL
-clCreateSampler(cl_context          context,
-                cl_bool             normalized_coords,
-                cl_addressing_mode  addressing_mode,
-                cl_filter_mode      filter_mode,
-                cl_int *            errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL
-clEnqueueTask(cl_command_queue  command_queue,
-              cl_kernel         kernel,
-              cl_uint           num_events_in_wait_list,
-              const cl_event *  event_wait_list,
-              cl_event *        event) CL_API_SUFFIX__VERSION_1_2_DEPRECATED;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* __OPENCL_CL_H */

diff --git a/dependencies/ocl-headers/CL/cl_egl.h b/dependencies/ocl-headers/CL/cl_egl.h
deleted file mode 100644
index 357a37c..0000000
--- a/dependencies/ocl-headers/CL/cl_egl.h
+++ /dev/null

@@ -1,120 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#ifndef __OPENCL_CL_EGL_H
-#define __OPENCL_CL_EGL_H
-
-#include <CL/cl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
-#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR  0x202F
-#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR    0x202D
-#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR    0x202E
-
-/* Error type for clCreateFromEGLImageKHR */
-#define CL_INVALID_EGL_OBJECT_KHR             -1093
-#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR      -1092
-
-/* CLeglImageKHR is an opaque handle to an EGLImage */
-typedef void* CLeglImageKHR;
-
-/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
-typedef void* CLeglDisplayKHR;
-
-/* CLeglSyncKHR is an opaque handle to an EGLSync object */
-typedef void* CLeglSyncKHR;
-
-/* properties passed to clCreateFromEGLImageKHR */
-typedef intptr_t cl_egl_image_properties_khr;
-
-
-#define cl_khr_egl_image 1
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromEGLImageKHR(cl_context                  context,
-                        CLeglDisplayKHR             egldisplay,
-                        CLeglImageKHR               eglimage,
-                        cl_mem_flags                flags,
-                        const cl_egl_image_properties_khr * properties,
-                        cl_int *                    errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
-    cl_context                  context,
-    CLeglDisplayKHR             egldisplay,
-    CLeglImageKHR               eglimage,
-    cl_mem_flags                flags,
-    const cl_egl_image_properties_khr * properties,
-    cl_int *                    errcode_ret);
-
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
-                              cl_uint          num_objects,
-                              const cl_mem *   mem_objects,
-                              cl_uint          num_events_in_wait_list,
-                              const cl_event * event_wait_list,
-                              cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-
-typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event);
-
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
-                              cl_uint          num_objects,
-                              const cl_mem *   mem_objects,
-                              cl_uint          num_events_in_wait_list,
-                              const cl_event * event_wait_list,
-                              cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-
-typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event);
-
-
-#define cl_khr_egl_event 1
-
-extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateEventFromEGLSyncKHR(cl_context      context,
-                            CLeglSyncKHR    sync,
-                            CLeglDisplayKHR display,
-                            cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
-    cl_context      context,
-    CLeglSyncKHR    sync,
-    CLeglDisplayKHR display,
-    cl_int *        errcode_ret);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* __OPENCL_CL_EGL_H */

diff --git a/dependencies/ocl-headers/CL/cl_ext.h b/dependencies/ocl-headers/CL/cl_ext.h
deleted file mode 100644
index 80a81de..0000000
--- a/dependencies/ocl-headers/CL/cl_ext.h
+++ /dev/null

@@ -1,1708 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-/* cl_ext.h contains OpenCL extensions which don't have external */
-/* (OpenGL, D3D) dependencies.                                   */
-
-#ifndef __CL_EXT_H
-#define __CL_EXT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <CL/cl.h>
-
-/* cl_khr_fp64 extension - no extension #define since it has no functions  */
-/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
-
-#if CL_TARGET_OPENCL_VERSION <= 110
-#define CL_DEVICE_DOUBLE_FP_CONFIG                       0x1032
-#endif
-
-/* cl_khr_fp16 extension - no extension #define since it has no functions  */
-#define CL_DEVICE_HALF_FP_CONFIG                    0x1033
-
-/* Memory object destruction
- *
- * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
- *
- * Registers a user callback function that will be called when the memory object is deleted and its resources
- * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
- * stack associated with memobj. The registered user callback functions are called in the reverse order in
- * which they were registered. The user callback functions are called and then the memory object is deleted
- * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
- * notified when the memory referenced by host_ptr, specified when the memory object is created and used as
- * the storage bits for the memory object, can be reused or freed.
- *
- * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
- *
- * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
- * before using.
- */
-#define cl_APPLE_SetMemObjectDestructor 1
-extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorAPPLE(  cl_mem memobj,
-                                        void (* pfn_notify)(cl_mem memobj, void * user_data),
-                                        void * user_data)             CL_API_SUFFIX__VERSION_1_0;
-
-
-/* Context Logging Functions
- *
- * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
- * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
- * before using.
- *
- * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
- */
-#define cl_APPLE_ContextLoggingFunctions 1
-extern CL_API_ENTRY void CL_API_CALL clLogMessagesToSystemLogAPPLE(  const char * errstr,
-                                            const void * private_info,
-                                            size_t       cb,
-                                            void *       user_data)  CL_API_SUFFIX__VERSION_1_0;
-
-/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
-extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStdoutAPPLE(   const char * errstr,
-                                          const void * private_info,
-                                          size_t       cb,
-                                          void *       user_data)    CL_API_SUFFIX__VERSION_1_0;
-
-/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
-extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStderrAPPLE(   const char * errstr,
-                                          const void * private_info,
-                                          size_t       cb,
-                                          void *       user_data)    CL_API_SUFFIX__VERSION_1_0;
-
-
-/************************
-* cl_khr_icd extension *
-************************/
-#define cl_khr_icd 1
-
-/* cl_platform_info                                                        */
-#define CL_PLATFORM_ICD_SUFFIX_KHR                  0x0920
-
-/* Additional Error Codes                                                  */
-#define CL_PLATFORM_NOT_FOUND_KHR                   -1001
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clIcdGetPlatformIDsKHR(cl_uint          num_entries,
-                       cl_platform_id * platforms,
-                       cl_uint *        num_platforms);
-
-typedef cl_int
-(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint          num_entries,
-                                         cl_platform_id * platforms,
-                                         cl_uint *        num_platforms);
-
-
-/*******************************
- * cl_khr_il_program extension *
- *******************************/
-#define cl_khr_il_program 1
-
-/* New property to clGetDeviceInfo for retrieving supported intermediate
- * languages
- */
-#define CL_DEVICE_IL_VERSION_KHR                    0x105B
-
-/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
- * program
- */
-#define CL_PROGRAM_IL_KHR                           0x1169
-
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithILKHR(cl_context   context,
-                         const void * il,
-                         size_t       length,
-                         cl_int *     errcode_ret);
-
-typedef cl_program
-(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context   context,
-                                           const void * il,
-                                           size_t       length,
-                                           cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-/* Extension: cl_khr_image2d_from_buffer
- *
- * This extension allows a 2D image to be created from a cl_mem buffer without
- * a copy. The type associated with a 2D image created from a buffer in an
- * OpenCL program is image2d_t. Both the sampler and sampler-less read_image
- * built-in functions are supported for 2D images and 2D images created from
- * a buffer.  Similarly, the write_image built-ins are also supported for 2D
- * images created from a buffer.
- *
- * When the 2D image from buffer is created, the client must specify the
- * width, height, image format (i.e. channel order and channel data type)
- * and optionally the row pitch.
- *
- * The pitch specified must be a multiple of
- * CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
- * The base address of the buffer must be aligned to
- * CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
- */
-
-#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR              0x104A
-#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR       0x104B
-
-
-/**************************************
- * cl_khr_initialize_memory extension *
- **************************************/
-
-#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x2030
-
-
-/**************************************
- * cl_khr_terminate_context extension *
- **************************************/
-
-#define CL_CONTEXT_TERMINATED_KHR                   -1121
-
-#define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x2031
-#define CL_CONTEXT_TERMINATE_KHR                    0x2032
-
-#define cl_khr_terminate_context 1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clTerminateContextKHR(cl_context context) CL_API_SUFFIX__VERSION_1_2;
-
-typedef cl_int
-(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_API_SUFFIX__VERSION_1_2;
-
-
-/*
- * Extension: cl_khr_spir
- *
- * This extension adds support to create an OpenCL program object from a
- * Standard Portable Intermediate Representation (SPIR) instance
- */
-
-#define CL_DEVICE_SPIR_VERSIONS                     0x40E0
-#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE         0x40E1
-
-
-/*****************************************
- * cl_khr_create_command_queue extension *
- *****************************************/
-#define cl_khr_create_command_queue 1
-
-typedef cl_properties cl_queue_properties_khr;
-
-extern CL_API_ENTRY cl_command_queue CL_API_CALL
-clCreateCommandQueueWithPropertiesKHR(cl_context context,
-                                      cl_device_id device,
-                                      const cl_queue_properties_khr* properties,
-                                      cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-typedef cl_command_queue
-(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
-                                                        cl_device_id device,
-                                                        const cl_queue_properties_khr* properties,
-                                                        cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-
-/******************************************
-* cl_nv_device_attribute_query extension *
-******************************************/
-
-/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
-#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV       0x4000
-#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV       0x4001
-#define CL_DEVICE_REGISTERS_PER_BLOCK_NV            0x4002
-#define CL_DEVICE_WARP_SIZE_NV                      0x4003
-#define CL_DEVICE_GPU_OVERLAP_NV                    0x4004
-#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
-#define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006
-
-
-/*********************************
-* cl_amd_device_attribute_query *
-*********************************/
-
-#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD            0x4036
-#define CL_DEVICE_TOPOLOGY_AMD                          0x4037
-#define CL_DEVICE_BOARD_NAME_AMD                        0x4038
-#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD                0x4039
-#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD             0x4040
-#define CL_DEVICE_SIMD_WIDTH_AMD                        0x4041
-#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD            0x4042
-#define CL_DEVICE_WAVEFRONT_WIDTH_AMD                   0x4043
-#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD               0x4044
-#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD          0x4045
-#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD     0x4046
-#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD   0x4047
-#define CL_DEVICE_LOCAL_MEM_BANKS_AMD                   0x4048
-#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD            0x4049
-#define CL_DEVICE_GFXIP_MAJOR_AMD                       0x404A
-#define CL_DEVICE_GFXIP_MINOR_AMD                       0x404B
-#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD            0x404C
-#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD         0x4030
-#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD               0x4031
-#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD    0x4033
-#define CL_DEVICE_PCIE_ID_AMD                           0x4034
-
-
-/*********************************
-* cl_arm_printf extension
-*********************************/
-
-#define CL_PRINTF_CALLBACK_ARM                      0x40B0
-#define CL_PRINTF_BUFFERSIZE_ARM                    0x40B1
-
-
-/***********************************
-* cl_ext_device_fission extension
-***********************************/
-#define cl_ext_device_fission   1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-
-typedef cl_int
-(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-
-typedef cl_int
-(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-
-typedef cl_ulong  cl_device_partition_property_ext;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCreateSubDevicesEXT(cl_device_id   in_device,
-                      const cl_device_partition_property_ext * properties,
-                      cl_uint        num_entries,
-                      cl_device_id * out_devices,
-                      cl_uint *      num_devices) CL_API_SUFFIX__VERSION_1_1;
-
-typedef cl_int
-(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id   in_device,
-                                         const cl_device_partition_property_ext * properties,
-                                         cl_uint        num_entries,
-                                         cl_device_id * out_devices,
-                                         cl_uint *      num_devices) CL_API_SUFFIX__VERSION_1_1;
-
-/* cl_device_partition_property_ext */
-#define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
-#define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
-#define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
-#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT  0x4053
-
-/* clDeviceGetInfo selectors */
-#define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
-#define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
-#define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
-#define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
-#define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
-
-/* error codes */
-#define CL_DEVICE_PARTITION_FAILED_EXT              -1057
-#define CL_INVALID_PARTITION_COUNT_EXT              -1058
-#define CL_INVALID_PARTITION_NAME_EXT               -1059
-
-/* CL_AFFINITY_DOMAINs */
-#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
-#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
-#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
-#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
-#define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
-#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT     0x100
-
-/* cl_device_partition_property_ext list terminators */
-#define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
-#define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
-#define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)
-
-
-/***********************************
- * cl_ext_migrate_memobject extension definitions
- ***********************************/
-#define cl_ext_migrate_memobject 1
-
-typedef cl_bitfield cl_mem_migration_flags_ext;
-
-#define CL_MIGRATE_MEM_OBJECT_HOST_EXT              0x1
-
-#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT           0x4040
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
-                             cl_uint          num_mem_objects,
-                             const cl_mem *   mem_objects,
-                             cl_mem_migration_flags_ext flags,
-                             cl_uint          num_events_in_wait_list,
-                             const cl_event * event_wait_list,
-                             cl_event *       event);
-
-typedef cl_int
-(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
-                                               cl_uint          num_mem_objects,
-                                               const cl_mem *   mem_objects,
-                                               cl_mem_migration_flags_ext flags,
-                                               cl_uint          num_events_in_wait_list,
-                                               const cl_event * event_wait_list,
-                                               cl_event *       event);
-
-
-/*********************************
-* cl_ext_cxx_for_opencl extension
-*********************************/
-#define cl_ext_cxx_for_opencl 1
-
-#define CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT 0x4230
-
-/*********************************
-* cl_qcom_ext_host_ptr extension
-*********************************/
-#define cl_qcom_ext_host_ptr 1
-
-#define CL_MEM_EXT_HOST_PTR_QCOM                  (1 << 29)
-
-#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM   0x40A0
-#define CL_DEVICE_PAGE_SIZE_QCOM                  0x40A1
-#define CL_IMAGE_ROW_ALIGNMENT_QCOM               0x40A2
-#define CL_IMAGE_SLICE_ALIGNMENT_QCOM             0x40A3
-#define CL_MEM_HOST_UNCACHED_QCOM                 0x40A4
-#define CL_MEM_HOST_WRITEBACK_QCOM                0x40A5
-#define CL_MEM_HOST_WRITETHROUGH_QCOM             0x40A6
-#define CL_MEM_HOST_WRITE_COMBINING_QCOM          0x40A7
-
-typedef cl_uint                                   cl_image_pitch_info_qcom;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceImageInfoQCOM(cl_device_id             device,
-                         size_t                   image_width,
-                         size_t                   image_height,
-                         const cl_image_format   *image_format,
-                         cl_image_pitch_info_qcom param_name,
-                         size_t                   param_value_size,
-                         void                    *param_value,
-                         size_t                  *param_value_size_ret);
-
-typedef struct _cl_mem_ext_host_ptr
-{
-    /* Type of external memory allocation. */
-    /* Legal values will be defined in layered extensions. */
-    cl_uint  allocation_type;
-
-    /* Host cache policy for this external memory allocation. */
-    cl_uint  host_cache_policy;
-
-} cl_mem_ext_host_ptr;
-
-
-/*******************************************
-* cl_qcom_ext_host_ptr_iocoherent extension
-********************************************/
-
-/* Cache policy specifying io-coherence */
-#define CL_MEM_HOST_IOCOHERENT_QCOM               0x40A9
-
-
-/*********************************
-* cl_qcom_ion_host_ptr extension
-*********************************/
-
-#define CL_MEM_ION_HOST_PTR_QCOM                  0x40A8
-
-typedef struct _cl_mem_ion_host_ptr
-{
-    /* Type of external memory allocation. */
-    /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
-    cl_mem_ext_host_ptr  ext_host_ptr;
-
-    /* ION file descriptor */
-    int                  ion_filedesc;
-
-    /* Host pointer to the ION allocated memory */
-    void*                ion_hostptr;
-
-} cl_mem_ion_host_ptr;
-
-
-/*********************************
-* cl_qcom_android_native_buffer_host_ptr extension
-*********************************/
-
-#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM                  0x40C6
-
-typedef struct _cl_mem_android_native_buffer_host_ptr
-{
-    /* Type of external memory allocation. */
-    /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
-    cl_mem_ext_host_ptr  ext_host_ptr;
-
-    /* Virtual pointer to the android native buffer */
-    void*                anb_ptr;
-
-} cl_mem_android_native_buffer_host_ptr;
-
-
-/******************************************
- * cl_img_yuv_image extension *
- ******************************************/
-
-/* Image formats used in clCreateImage */
-#define CL_NV21_IMG                                 0x40D0
-#define CL_YV12_IMG                                 0x40D1
-
-
-/******************************************
- * cl_img_cached_allocations extension *
- ******************************************/
-
-/* Flag values used by clCreateBuffer */
-#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG          (1 << 26)
-#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG            (1 << 27)
-
-
-/******************************************
- * cl_img_use_gralloc_ptr extension *
- ******************************************/
-#define cl_img_use_gralloc_ptr 1
-
-/* Flag values used by clCreateBuffer */
-#define CL_MEM_USE_GRALLOC_PTR_IMG                  (1 << 28)
-
-/* To be used by clGetEventInfo: */
-#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG      0x40D2
-#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG      0x40D3
-
-/* Error codes from clEnqueueAcquireGrallocObjectsIMG and clEnqueueReleaseGrallocObjectsIMG */
-#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG        0x40D4
-#define CL_INVALID_GRALLOC_OBJECT_IMG               0x40D5
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireGrallocObjectsIMG(cl_command_queue      command_queue,
-                                  cl_uint               num_objects,
-                                  const cl_mem *        mem_objects,
-                                  cl_uint               num_events_in_wait_list,
-                                  const cl_event *      event_wait_list,
-                                  cl_event *            event) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseGrallocObjectsIMG(cl_command_queue      command_queue,
-                                  cl_uint               num_objects,
-                                  const cl_mem *        mem_objects,
-                                  cl_uint               num_events_in_wait_list,
-                                  const cl_event *      event_wait_list,
-                                  cl_event *            event) CL_API_SUFFIX__VERSION_1_2;
-
-/******************************************
- * cl_img_generate_mipmap extension *
- ******************************************/
-#define cl_img_generate_mipmap 1
-
-typedef cl_uint cl_mipmap_filter_mode_img;
-
-/* To be used by clEnqueueGenerateMipmapIMG */
-#define CL_MIPMAP_FILTER_ANY_IMG 0x0
-#define CL_MIPMAP_FILTER_BOX_IMG 0x1
-
-/* To be used by clGetEventInfo */
-#define CL_COMMAND_GENERATE_MIPMAP_IMG 0x40D6
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueGenerateMipmapIMG(cl_command_queue          command_queue,
-                           cl_mem                    src_image,
-                           cl_mem                    dst_image,
-                           cl_mipmap_filter_mode_img mipmap_filter_mode,
-                           const size_t              *array_region,
-                           const size_t              *mip_region,
-                           cl_uint                   num_events_in_wait_list,
-                           const cl_event            *event_wait_list,
-                           cl_event *event) CL_API_SUFFIX__VERSION_1_2;
-  
-/******************************************
- * cl_img_mem_properties extension *
- ******************************************/
-#define cl_img_mem_properties 1
-
-/* To be used by clCreateBufferWithProperties */
-#define CL_MEM_ALLOC_FLAGS_IMG 0x40D7
-
-/* To be used wiith the CL_MEM_ALLOC_FLAGS_IMG property */
-typedef cl_bitfield cl_mem_alloc_flags_img;
-
-/* To be used with cl_mem_alloc_flags_img */
-#define CL_MEM_ALLOC_RELAX_REQUIREMENTS_IMG (1 << 0)
-
-/*********************************
-* cl_khr_subgroups extension
-*********************************/
-#define cl_khr_subgroups 1
-
-#if !defined(CL_VERSION_2_1)
-/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
-   In hindsight, there should have been a khr suffix on this type for
-   the extension, but keeping it un-suffixed to maintain backwards
-   compatibility. */
-typedef cl_uint             cl_kernel_sub_group_info;
-#endif
-
-/* cl_kernel_sub_group_info */
-#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR    0x2033
-#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR       0x2034
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelSubGroupInfoKHR(cl_kernel    in_kernel,
-                           cl_device_id in_device,
-                           cl_kernel_sub_group_info param_name,
-                           size_t       input_value_size,
-                           const void * input_value,
-                           size_t       param_value_size,
-                           void *       param_value,
-                           size_t *     param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED;
-
-typedef cl_int
-(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel    in_kernel,
-                                              cl_device_id in_device,
-                                              cl_kernel_sub_group_info param_name,
-                                              size_t       input_value_size,
-                                              const void * input_value,
-                                              size_t       param_value_size,
-                                              void *       param_value,
-                                              size_t *     param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED;
-
-
-/*********************************
-* cl_khr_mipmap_image extension
-*********************************/
-
-/* cl_sampler_properties */
-#define CL_SAMPLER_MIP_FILTER_MODE_KHR              0x1155
-#define CL_SAMPLER_LOD_MIN_KHR                      0x1156
-#define CL_SAMPLER_LOD_MAX_KHR                      0x1157
-
-
-/*********************************
-* cl_khr_priority_hints extension
-*********************************/
-/* This extension define is for backwards compatibility.
-   It shouldn't be required since this extension has no new functions. */
-#define cl_khr_priority_hints 1
-
-typedef cl_uint  cl_queue_priority_khr;
-
-/* cl_command_queue_properties */
-#define CL_QUEUE_PRIORITY_KHR 0x1096
-
-/* cl_queue_priority_khr */
-#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
-#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
-#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
-
-
-/*********************************
-* cl_khr_throttle_hints extension
-*********************************/
-/* This extension define is for backwards compatibility.
-   It shouldn't be required since this extension has no new functions. */
-#define cl_khr_throttle_hints 1
-
-typedef cl_uint  cl_queue_throttle_khr;
-
-/* cl_command_queue_properties */
-#define CL_QUEUE_THROTTLE_KHR 0x1097
-
-/* cl_queue_throttle_khr */
-#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
-#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
-#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
-
-
-/*********************************
-* cl_khr_subgroup_named_barrier
-*********************************/
-/* This extension define is for backwards compatibility.
-   It shouldn't be required since this extension has no new functions. */
-#define cl_khr_subgroup_named_barrier 1
-
-/* cl_device_info */
-#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR       0x2035
-
-
-/*********************************
-* cl_khr_extended_versioning
-*********************************/
-
-#define cl_khr_extended_versioning 1
-
-#define CL_VERSION_MAJOR_BITS_KHR (10)
-#define CL_VERSION_MINOR_BITS_KHR (10)
-#define CL_VERSION_PATCH_BITS_KHR (12)
-
-#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1)
-#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1)
-#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1)
-
-#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR))
-#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR)
-#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR)
-
-#define CL_MAKE_VERSION_KHR(major, minor, patch) \
-    ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \
-    (((minor) &  CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \
-    ((patch) & CL_VERSION_PATCH_MASK_KHR))
-
-typedef cl_uint cl_version_khr;
-
-#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64
-
-typedef struct _cl_name_version_khr
-{
-    cl_version_khr version;
-    char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR];
-} cl_name_version_khr;
-
-/* cl_platform_info */
-#define CL_PLATFORM_NUMERIC_VERSION_KHR                  0x0906
-#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR          0x0907
-
-/* cl_device_info */
-#define CL_DEVICE_NUMERIC_VERSION_KHR                    0x105E
-#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR           0x105F
-#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR            0x1060
-#define CL_DEVICE_ILS_WITH_VERSION_KHR                   0x1061
-#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR      0x1062
-
-
-/*********************************
-* cl_khr_device_uuid extension
-*********************************/
-#define cl_khr_device_uuid 1
-
-#define CL_UUID_SIZE_KHR 16
-#define CL_LUID_SIZE_KHR 8
-
-#define CL_DEVICE_UUID_KHR          0x106A
-#define CL_DRIVER_UUID_KHR          0x106B
-#define CL_DEVICE_LUID_VALID_KHR    0x106C
-#define CL_DEVICE_LUID_KHR          0x106D
-#define CL_DEVICE_NODE_MASK_KHR     0x106E
-
-
-/***************************************************************
-* cl_khr_pci_bus_info
-***************************************************************/
-#define cl_khr_pci_bus_info 1
-
-typedef struct _cl_device_pci_bus_info_khr {
-    cl_uint pci_domain;
-    cl_uint pci_bus;
-    cl_uint pci_device;
-    cl_uint pci_function;
-} cl_device_pci_bus_info_khr;
-
-/* cl_device_info */
-#define CL_DEVICE_PCI_BUS_INFO_KHR                          0x410F
-
-
-/***************************************************************
-* cl_khr_suggested_local_work_size
-***************************************************************/
-#define cl_khr_suggested_local_work_size 1
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelSuggestedLocalWorkSizeKHR(
-    cl_command_queue command_queue,
-    cl_kernel kernel,
-    cl_uint work_dim,
-    const size_t* global_work_offset,
-    const size_t* global_work_size,
-    size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0;
-
-typedef cl_int (CL_API_CALL *
-clGetKernelSuggestedLocalWorkSizeKHR_fn)(
-    cl_command_queue command_queue,
-    cl_kernel kernel,
-    cl_uint work_dim,
-    const size_t* global_work_offset,
-    const size_t* global_work_size,
-    size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0;
-
-
-/**********************************
- * cl_arm_import_memory extension *
- **********************************/
-#define cl_arm_import_memory 1
-
-typedef intptr_t cl_import_properties_arm;
-
-/* Default and valid proporties name for cl_arm_import_memory */
-#define CL_IMPORT_TYPE_ARM                        0x40B2
-
-/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
-#define CL_IMPORT_TYPE_HOST_ARM                   0x40B3
-
-/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
-#define CL_IMPORT_TYPE_DMA_BUF_ARM                0x40B4
-
-/* Protected memory property */
-#define CL_IMPORT_TYPE_PROTECTED_ARM              0x40B5
-
-/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */
-#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2
-
-/* Data consistency with host property */
-#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3
-
-/* Index of plane in a multiplanar hardware buffer */
-#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_PLANE_INDEX_ARM 0x41EF
-
-/* Index of layer in a multilayer hardware buffer */
-#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_LAYER_INDEX_ARM 0x41F0
-
-/* Import memory size value to indicate a size for the whole buffer */
-#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX
-
-/* This extension adds a new function that allows for direct memory import into
- * OpenCL via the clImportMemoryARM function.
- *
- * Memory imported through this interface will be mapped into the device's page
- * tables directly, providing zero copy access. It will never fall back to copy
- * operations and aliased buffers.
- *
- * Types of memory supported for import are specified as additional extension
- * strings.
- *
- * This extension produces cl_mem allocations which are compatible with all other
- * users of cl_mem in the standard API.
- *
- * This extension maps pages with the same properties as the normal buffer creation
- * function clCreateBuffer.
- */
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clImportMemoryARM( cl_context context,
-                   cl_mem_flags flags,
-                   const cl_import_properties_arm *properties,
-                   void *memory,
-                   size_t size,
-                   cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-
-/******************************************
- * cl_arm_shared_virtual_memory extension *
- ******************************************/
-#define cl_arm_shared_virtual_memory 1
-
-/* Used by clGetDeviceInfo */
-#define CL_DEVICE_SVM_CAPABILITIES_ARM                  0x40B6
-
-/* Used by clGetMemObjectInfo */
-#define CL_MEM_USES_SVM_POINTER_ARM                     0x40B7
-
-/* Used by clSetKernelExecInfoARM: */
-#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM                0x40B8
-#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM   0x40B9
-
-/* To be used by clGetEventInfo: */
-#define CL_COMMAND_SVM_FREE_ARM                         0x40BA
-#define CL_COMMAND_SVM_MEMCPY_ARM                       0x40BB
-#define CL_COMMAND_SVM_MEMFILL_ARM                      0x40BC
-#define CL_COMMAND_SVM_MAP_ARM                          0x40BD
-#define CL_COMMAND_SVM_UNMAP_ARM                        0x40BE
-
-/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
-#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM           (1 << 0)
-#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM             (1 << 1)
-#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM             (1 << 2)
-#define CL_DEVICE_SVM_ATOMICS_ARM                       (1 << 3)
-
-/* Flag values used by clSVMAllocARM: */
-#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM                (1 << 10)
-#define CL_MEM_SVM_ATOMICS_ARM                          (1 << 11)
-
-typedef cl_bitfield cl_svm_mem_flags_arm;
-typedef cl_uint     cl_kernel_exec_info_arm;
-typedef cl_bitfield cl_device_svm_capabilities_arm;
-
-extern CL_API_ENTRY void * CL_API_CALL
-clSVMAllocARM(cl_context       context,
-              cl_svm_mem_flags_arm flags,
-              size_t           size,
-              cl_uint          alignment) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY void CL_API_CALL
-clSVMFreeARM(cl_context        context,
-             void *            svm_pointer) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMFreeARM(cl_command_queue  command_queue,
-                    cl_uint           num_svm_pointers,
-                    void *            svm_pointers[],
-                    void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
-                                                       cl_uint          num_svm_pointers,
-                                                       void *           svm_pointers[],
-                                                       void *           user_data),
-                    void *            user_data,
-                    cl_uint           num_events_in_wait_list,
-                    const cl_event *  event_wait_list,
-                    cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemcpyARM(cl_command_queue  command_queue,
-                      cl_bool           blocking_copy,
-                      void *            dst_ptr,
-                      const void *      src_ptr,
-                      size_t            size,
-                      cl_uint           num_events_in_wait_list,
-                      const cl_event *  event_wait_list,
-                      cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemFillARM(cl_command_queue  command_queue,
-                       void *            svm_ptr,
-                       const void *      pattern,
-                       size_t            pattern_size,
-                       size_t            size,
-                       cl_uint           num_events_in_wait_list,
-                       const cl_event *  event_wait_list,
-                       cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMapARM(cl_command_queue  command_queue,
-                   cl_bool           blocking_map,
-                   cl_map_flags      flags,
-                   void *            svm_ptr,
-                   size_t            size,
-                   cl_uint           num_events_in_wait_list,
-                   const cl_event *  event_wait_list,
-                   cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMUnmapARM(cl_command_queue  command_queue,
-                     void *            svm_ptr,
-                     cl_uint           num_events_in_wait_list,
-                     const cl_event *  event_wait_list,
-                     cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArgSVMPointerARM(cl_kernel    kernel,
-                            cl_uint      arg_index,
-                            const void * arg_value) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelExecInfoARM(cl_kernel            kernel,
-                       cl_kernel_exec_info_arm  param_name,
-                       size_t               param_value_size,
-                       const void *         param_value) CL_API_SUFFIX__VERSION_1_2;
-
-/********************************
- * cl_arm_get_core_id extension *
- ********************************/
-
-#ifdef CL_VERSION_1_2
-
-#define cl_arm_get_core_id 1
-
-/* Device info property for bitfield of cores present */
-#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM      0x40BF
-
-#endif  /* CL_VERSION_1_2 */
-
-/*********************************
-* cl_arm_job_slot_selection
-*********************************/
-
-#define cl_arm_job_slot_selection 1
-
-/* cl_device_info */
-#define CL_DEVICE_JOB_SLOTS_ARM                   0x41E0
-
-/* cl_command_queue_properties */
-#define CL_QUEUE_JOB_SLOT_ARM                     0x41E1
-
-/*********************************
-* cl_arm_scheduling_controls
-*********************************/
-
-#define cl_arm_scheduling_controls 1
-
-typedef cl_bitfield cl_device_scheduling_controls_capabilities_arm;
-
-/* cl_device_info */
-#define CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM          0x41E4
-
-#define CL_DEVICE_SCHEDULING_KERNEL_BATCHING_ARM               (1 << 0)
-#define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM          (1 << 1)
-#define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2)
-#define CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM                (1 << 3)
-#define CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM           (1 << 4)
-
-#define CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM            0x41EB
-
-/* cl_kernel_info */
-#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM            0x41E5
-#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM   0x41E6
-
-/* cl_queue_properties */
-#define CL_QUEUE_KERNEL_BATCHING_ARM                            0x41E7
-#define CL_QUEUE_DEFERRED_FLUSH_ARM                             0x41EC
-
-/**************************************
-* cl_arm_controlled_kernel_termination
-***************************************/
-
-#define cl_arm_controlled_kernel_termination 1
-
-/* Error code to indicate kernel terminated with failure */
-#define CL_COMMAND_TERMINATED_ITSELF_WITH_FAILURE_ARM -1108
-
-/* cl_device_info */
-#define CL_DEVICE_CONTROLLED_TERMINATION_CAPABILITIES_ARM 0x41EE
-
-/* Bit fields for controlled termination feature query */
-typedef cl_bitfield cl_device_controlled_termination_capabilities_arm;
-
-#define CL_DEVICE_CONTROLLED_TERMINATION_SUCCESS_ARM (1 << 0)
-#define CL_DEVICE_CONTROLLED_TERMINATION_FAILURE_ARM (1 << 1)
-#define CL_DEVICE_CONTROLLED_TERMINATION_QUERY_ARM (1 << 2)
-
-/* cl_event_info */
-#define CL_EVENT_COMMAND_TERMINATION_REASON_ARM 0x41ED
-
-/* Values returned for event termination reason query */
-typedef cl_uint cl_command_termination_reason_arm;
-
-#define CL_COMMAND_TERMINATION_COMPLETION_ARM  0
-#define CL_COMMAND_TERMINATION_CONTROLLED_SUCCESS_ARM 1
-#define CL_COMMAND_TERMINATION_CONTROLLED_FAILURE_ARM 2
-#define CL_COMMAND_TERMINATION_ERROR_ARM 3
-
-/***************************************
-* cl_intel_thread_local_exec extension *
-****************************************/
-
-#define cl_intel_thread_local_exec 1
-
-#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL      (((cl_bitfield)1) << 31)
-
-/***********************************************
-* cl_intel_device_partition_by_names extension *
-************************************************/
-
-#define cl_intel_device_partition_by_names 1
-
-#define CL_DEVICE_PARTITION_BY_NAMES_INTEL          0x4052
-#define CL_PARTITION_BY_NAMES_LIST_END_INTEL        -1
-
-/************************************************
-* cl_intel_accelerator extension                *
-* cl_intel_motion_estimation extension          *
-* cl_intel_advanced_motion_estimation extension *
-*************************************************/
-
-#define cl_intel_accelerator 1
-#define cl_intel_motion_estimation 1
-#define cl_intel_advanced_motion_estimation 1
-
-typedef struct _cl_accelerator_intel* cl_accelerator_intel;
-typedef cl_uint cl_accelerator_type_intel;
-typedef cl_uint cl_accelerator_info_intel;
-
-typedef struct _cl_motion_estimation_desc_intel {
-    cl_uint mb_block_type;
-    cl_uint subpixel_mode;
-    cl_uint sad_adjust_mode;
-    cl_uint search_path_type;
-} cl_motion_estimation_desc_intel;
-
-/* error codes */
-#define CL_INVALID_ACCELERATOR_INTEL                              -1094
-#define CL_INVALID_ACCELERATOR_TYPE_INTEL                         -1095
-#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL                   -1096
-#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL                   -1097
-
-/* cl_accelerator_type_intel */
-#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL               0x0
-
-/* cl_accelerator_info_intel */
-#define CL_ACCELERATOR_DESCRIPTOR_INTEL                           0x4090
-#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL                      0x4091
-#define CL_ACCELERATOR_CONTEXT_INTEL                              0x4092
-#define CL_ACCELERATOR_TYPE_INTEL                                 0x4093
-
-/* cl_motion_detect_desc_intel flags */
-#define CL_ME_MB_TYPE_16x16_INTEL                                 0x0
-#define CL_ME_MB_TYPE_8x8_INTEL                                   0x1
-#define CL_ME_MB_TYPE_4x4_INTEL                                   0x2
-
-#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL                         0x0
-#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL                            0x1
-#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL                            0x2
-
-#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL                          0x0
-#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL                          0x1
-
-#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL                        0x0
-#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL                        0x1
-#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL                      0x5
-
-#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL                         0x0
-#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL                  0x1
-#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL                    0x2
-#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL                           0x4
-
-#define CL_ME_FORWARD_INPUT_MODE_INTEL                            0x1
-#define CL_ME_BACKWARD_INPUT_MODE_INTEL                           0x2
-#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL                        0x3
-
-#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL                          16
-#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL                            21
-#define CL_ME_BIDIR_WEIGHT_HALF_INTEL                             32
-#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL                        43
-#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL                    48
-
-#define CL_ME_COST_PENALTY_NONE_INTEL                             0x0
-#define CL_ME_COST_PENALTY_LOW_INTEL                              0x1
-#define CL_ME_COST_PENALTY_NORMAL_INTEL                           0x2
-#define CL_ME_COST_PENALTY_HIGH_INTEL                             0x3
-
-#define CL_ME_COST_PRECISION_QPEL_INTEL                           0x0
-#define CL_ME_COST_PRECISION_HPEL_INTEL                           0x1
-#define CL_ME_COST_PRECISION_PEL_INTEL                            0x2
-#define CL_ME_COST_PRECISION_DPEL_INTEL                           0x3
-
-#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL                  0x0
-#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL                0x1
-#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL                        0x2
-#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL        0x3
-
-#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL       0x4
-#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL                     0x4
-#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL            0x5
-#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL           0x6
-#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL             0x7
-#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL             0x8
-
-#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL                      0x0
-#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL              0x1
-#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL                0x2
-#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL                   0x3
-
-/* cl_device_info */
-#define CL_DEVICE_ME_VERSION_INTEL                                0x407E
-
-#define CL_ME_VERSION_LEGACY_INTEL                                0x0
-#define CL_ME_VERSION_ADVANCED_VER_1_INTEL                        0x1
-#define CL_ME_VERSION_ADVANCED_VER_2_INTEL                        0x2
-
-extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
-clCreateAcceleratorINTEL(
-    cl_context                   context,
-    cl_accelerator_type_intel    accelerator_type,
-    size_t                       descriptor_size,
-    const void*                  descriptor,
-    cl_int*                      errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-typedef cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
-    cl_context                   context,
-    cl_accelerator_type_intel    accelerator_type,
-    size_t                       descriptor_size,
-    const void*                  descriptor,
-    cl_int*                      errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetAcceleratorInfoINTEL(
-    cl_accelerator_intel         accelerator,
-    cl_accelerator_info_intel    param_name,
-    size_t                       param_value_size,
-    void*                        param_value,
-    size_t*                      param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
-
-typedef cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
-    cl_accelerator_intel         accelerator,
-    cl_accelerator_info_intel    param_name,
-    size_t                       param_value_size,
-    void*                        param_value,
-    size_t*                      param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainAcceleratorINTEL(
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-
-typedef cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseAcceleratorINTEL(
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-
-typedef cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-
-/******************************************
-* cl_intel_simultaneous_sharing extension *
-*******************************************/
-
-#define cl_intel_simultaneous_sharing 1
-
-#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL            0x4104
-#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL        0x4105
-
-/***********************************
-* cl_intel_egl_image_yuv extension *
-************************************/
-
-#define cl_intel_egl_image_yuv 1
-
-#define CL_EGL_YUV_PLANE_INTEL                           0x4107
-
-/********************************
-* cl_intel_packed_yuv extension *
-*********************************/
-
-#define cl_intel_packed_yuv 1
-
-#define CL_YUYV_INTEL                                    0x4076
-#define CL_UYVY_INTEL                                    0x4077
-#define CL_YVYU_INTEL                                    0x4078
-#define CL_VYUY_INTEL                                    0x4079
-
-/********************************************
-* cl_intel_required_subgroup_size extension *
-*********************************************/
-
-#define cl_intel_required_subgroup_size 1
-
-#define CL_DEVICE_SUB_GROUP_SIZES_INTEL                  0x4108
-#define CL_KERNEL_SPILL_MEM_SIZE_INTEL                   0x4109
-#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL           0x410A
-
-/****************************************
-* cl_intel_driver_diagnostics extension *
-*****************************************/
-
-#define cl_intel_driver_diagnostics 1
-
-typedef cl_uint cl_diagnostics_verbose_level;
-
-#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL                0x4106
-
-#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL           ( 0xff )
-#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL          ( 1 )
-#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL           ( 1 << 1 )
-#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL       ( 1 << 2 )
-
-/********************************
-* cl_intel_planar_yuv extension *
-*********************************/
-
-#define CL_NV12_INTEL                                       0x410E
-
-#define CL_MEM_NO_ACCESS_INTEL                              ( 1 << 24 )
-#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL              ( 1 << 25 )
-
-#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL                0x417E
-#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL               0x417F
-
-/*******************************************************
-* cl_intel_device_side_avc_motion_estimation extension *
-********************************************************/
-
-#define CL_DEVICE_AVC_ME_VERSION_INTEL                      0x410B
-#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
-#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL          0x410D
-
-#define CL_AVC_ME_VERSION_0_INTEL                           0x0   /* No support. */
-#define CL_AVC_ME_VERSION_1_INTEL                           0x1   /* First supported version. */
-
-#define CL_AVC_ME_MAJOR_16x16_INTEL                         0x0
-#define CL_AVC_ME_MAJOR_16x8_INTEL                          0x1
-#define CL_AVC_ME_MAJOR_8x16_INTEL                          0x2
-#define CL_AVC_ME_MAJOR_8x8_INTEL                           0x3
-
-#define CL_AVC_ME_MINOR_8x8_INTEL                           0x0
-#define CL_AVC_ME_MINOR_8x4_INTEL                           0x1
-#define CL_AVC_ME_MINOR_4x8_INTEL                           0x2
-#define CL_AVC_ME_MINOR_4x4_INTEL                           0x3
-
-#define CL_AVC_ME_MAJOR_FORWARD_INTEL                       0x0
-#define CL_AVC_ME_MAJOR_BACKWARD_INTEL                      0x1
-#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL                 0x2
-
-#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL                  0x0
-#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL                0x7E
-#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL                 0x7D
-#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL                 0x7B
-#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL                  0x77
-#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL                  0x6F
-#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL                  0x5F
-#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL                  0x3F
-
-#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL            0x0
-#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL                 0x1
-#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL                  0x2
-#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL            0x3
-#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL               0x4
-#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL         0x5
-#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL             0x6
-#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL             0x7
-#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL                0x8
-#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL          0x9
-#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL            0x2
-#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL            0xa
-
-#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL                0x0
-#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL                0x2
-
-#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL               0x0
-#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL                  0x1
-#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL                  0x3
-
-#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL                 0x0
-#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL                 0x1
-#define CL_AVC_ME_COST_PRECISION_PEL_INTEL                  0x2
-#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL                 0x3
-
-#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL                0x10
-#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL                  0x15
-#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL                   0x20
-#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL              0x2B
-#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL          0x30
-
-#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL                 0x0
-#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL                0x2
-#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL                  0x4
-#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL               0x8
-
-#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL          0x0
-#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL            0x4000
-
-#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL     ( 0x1 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL    ( 0x2 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL        ( 0x3 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL       ( 0x55 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL      ( 0xAA << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL          ( 0xFF << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL     ( 0x1 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL    ( 0x2 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL     ( 0x1 << 26 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL    ( 0x2 << 26 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL     ( 0x1 << 28 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL    ( 0x2 << 28 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL     ( 0x1 << 30 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL    ( 0x2 << 30 )
-
-#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL                0x00
-#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL                0x80
-
-#define CL_AVC_ME_INTRA_16x16_INTEL                         0x0
-#define CL_AVC_ME_INTRA_8x8_INTEL                           0x1
-#define CL_AVC_ME_INTRA_4x4_INTEL                           0x2
-
-#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL     0x6
-#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL       0x5
-#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL       0x3
-
-#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL         0x60
-#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL        0x10
-#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL  0x8
-#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL   0x4
-
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL            0x0
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL          0x1
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL                  0x2
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL  0x3
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL               0x4
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL      0x5
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL     0x6
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL       0x7
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL       0x8
-#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL                0x0
-#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL        0x1
-#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL          0x2
-#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL             0x3
-
-#define CL_AVC_ME_FRAME_FORWARD_INTEL                       0x1
-#define CL_AVC_ME_FRAME_BACKWARD_INTEL                      0x2
-#define CL_AVC_ME_FRAME_DUAL_INTEL                          0x3
-
-#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL                     0x0
-#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL                    0x1
-#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL                    0x2
-
-#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL           0x0
-#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL        0x1
-
-/*******************************************
-* cl_intel_unified_shared_memory extension *
-********************************************/
-
-/* These APIs are in sync with Revision Q of the cl_intel_unified_shared_memory spec! */
-
-#define cl_intel_unified_shared_memory 1
-
-/* cl_device_info */
-#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL                   0x4190
-#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL                 0x4191
-#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL   0x4192
-#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL    0x4193
-#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL          0x4194
-
-typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
-
-/* cl_device_unified_shared_memory_capabilities_intel - bitfield */
-#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL                   (1 << 0)
-#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL            (1 << 1)
-#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL        (1 << 2)
-#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3)
-
-typedef cl_properties cl_mem_properties_intel;
-
-/* cl_mem_properties_intel */
-#define CL_MEM_ALLOC_FLAGS_INTEL        0x4195
-
-typedef cl_bitfield cl_mem_alloc_flags_intel;
-
-/* cl_mem_alloc_flags_intel - bitfield */
-#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL               (1 << 0)
-
-typedef cl_uint cl_mem_info_intel;
-
-/* cl_mem_alloc_info_intel */
-#define CL_MEM_ALLOC_TYPE_INTEL         0x419A
-#define CL_MEM_ALLOC_BASE_PTR_INTEL     0x419B
-#define CL_MEM_ALLOC_SIZE_INTEL         0x419C
-#define CL_MEM_ALLOC_DEVICE_INTEL       0x419D
-/* Enum values 0x419E-0x419F are reserved for future queries. */
-
-typedef cl_uint cl_unified_shared_memory_type_intel;
-
-/* cl_unified_shared_memory_type_intel */
-#define CL_MEM_TYPE_UNKNOWN_INTEL       0x4196
-#define CL_MEM_TYPE_HOST_INTEL          0x4197
-#define CL_MEM_TYPE_DEVICE_INTEL        0x4198
-#define CL_MEM_TYPE_SHARED_INTEL        0x4199
-
-typedef cl_uint cl_mem_advice_intel;
-
-/* cl_mem_advice_intel */
-/* Enum values 0x4208-0x420F are reserved for future memory advices. */
-
-/* cl_kernel_exec_info */
-#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL      0x4200
-#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL    0x4201
-#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL    0x4202
-#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL                  0x4203
-
-/* cl_command_type */
-#define CL_COMMAND_MEMFILL_INTEL        0x4204
-#define CL_COMMAND_MEMCPY_INTEL         0x4205
-#define CL_COMMAND_MIGRATEMEM_INTEL     0x4206
-#define CL_COMMAND_MEMADVISE_INTEL      0x4207
-
-extern CL_API_ENTRY void* CL_API_CALL
-clHostMemAllocINTEL(
-            cl_context context,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-
-typedef void* (CL_API_CALL *
-clHostMemAllocINTEL_fn)(
-            cl_context context,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-
-extern CL_API_ENTRY void* CL_API_CALL
-clDeviceMemAllocINTEL(
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-
-typedef void* (CL_API_CALL *
-clDeviceMemAllocINTEL_fn)(
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-
-extern CL_API_ENTRY void* CL_API_CALL
-clSharedMemAllocINTEL(
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-
-typedef void* (CL_API_CALL *
-clSharedMemAllocINTEL_fn)(
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clMemFreeINTEL(
-            cl_context context,
-            void* ptr);
-
-typedef cl_int (CL_API_CALL *
-clMemFreeINTEL_fn)(
-            cl_context context,
-            void* ptr);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clMemBlockingFreeINTEL(
-            cl_context context,
-            void* ptr);
-
-typedef cl_int (CL_API_CALL *
-clMemBlockingFreeINTEL_fn)(
-            cl_context context,
-            void* ptr);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetMemAllocInfoINTEL(
-            cl_context context,
-            const void* ptr,
-            cl_mem_info_intel param_name,
-            size_t param_value_size,
-            void* param_value,
-            size_t* param_value_size_ret);
-
-typedef cl_int (CL_API_CALL *
-clGetMemAllocInfoINTEL_fn)(
-            cl_context context,
-            const void* ptr,
-            cl_mem_info_intel param_name,
-            size_t param_value_size,
-            void* param_value,
-            size_t* param_value_size_ret);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArgMemPointerINTEL(
-            cl_kernel kernel,
-            cl_uint arg_index,
-            const void* arg_value);
-
-typedef cl_int (CL_API_CALL *
-clSetKernelArgMemPointerINTEL_fn)(
-            cl_kernel kernel,
-            cl_uint arg_index,
-            const void* arg_value);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemsetINTEL(
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            cl_int value,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-typedef cl_int (CL_API_CALL *
-clEnqueueMemsetINTEL_fn)(
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            cl_int value,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemFillINTEL(
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            const void* pattern,
-            size_t pattern_size,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-typedef cl_int (CL_API_CALL *
-clEnqueueMemFillINTEL_fn)(
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            const void* pattern,
-            size_t pattern_size,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemcpyINTEL(
-            cl_command_queue command_queue,
-            cl_bool blocking,
-            void* dst_ptr,
-            const void* src_ptr,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-typedef cl_int (CL_API_CALL *
-clEnqueueMemcpyINTEL_fn)(
-            cl_command_queue command_queue,
-            cl_bool blocking,
-            void* dst_ptr,
-            const void* src_ptr,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-#ifdef CL_VERSION_1_2
-
-/* Because these APIs use cl_mem_migration_flags, they require
-   OpenCL 1.2: */
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMigrateMemINTEL(
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_migration_flags flags,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-typedef cl_int (CL_API_CALL *
-clEnqueueMigrateMemINTEL_fn)(
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_migration_flags flags,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-#endif
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemAdviseINTEL(
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_advice_intel advice,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-typedef cl_int (CL_API_CALL *
-clEnqueueMemAdviseINTEL_fn)(
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_advice_intel advice,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-
-/***************************************************
-* cl_intel_create_buffer_with_properties extension *
-****************************************************/
-
-#define cl_intel_create_buffer_with_properties 1
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateBufferWithPropertiesINTEL(
-    cl_context   context,
-    const cl_mem_properties_intel* properties,
-    cl_mem_flags flags,
-    size_t       size,
-    void *       host_ptr,
-    cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-typedef cl_mem (CL_API_CALL *
-clCreateBufferWithPropertiesINTEL_fn)(
-    cl_context   context,
-    const cl_mem_properties_intel* properties,
-    cl_mem_flags flags,
-    size_t       size,
-    void *       host_ptr,
-    cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-/******************************************
-* cl_intel_mem_channel_property extension *
-*******************************************/
-
-#define CL_MEM_CHANNEL_INTEL            0x4213
-
-/*********************************
-* cl_intel_mem_force_host_memory *
-**********************************/
-
-#define cl_intel_mem_force_host_memory 1
-
-/* cl_mem_flags */
-#define CL_MEM_FORCE_HOST_MEMORY_INTEL                      (1 << 20)
-
-/***************************************************************
-* cl_intel_command_queue_families
-***************************************************************/
-#define cl_intel_command_queue_families 1
-
-typedef cl_bitfield         cl_command_queue_capabilities_intel;
-
-#define CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL                 64
-
-typedef struct _cl_queue_family_properties_intel {
-    cl_command_queue_properties properties;
-    cl_command_queue_capabilities_intel capabilities;
-    cl_uint count;
-    char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL];
-} cl_queue_family_properties_intel;
-
-/* cl_device_info */
-#define CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL             0x418B
-
-/* cl_queue_properties */
-#define CL_QUEUE_FAMILY_INTEL                               0x418C
-#define CL_QUEUE_INDEX_INTEL                                0x418D
-
-/* cl_command_queue_capabilities_intel */
-#define CL_QUEUE_DEFAULT_CAPABILITIES_INTEL                 0
-#define CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL (1 << 0)
-#define CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL (1 << 1)
-#define CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 2)
-#define CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 3)
-#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL           (1 << 8)
-#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL      (1 << 9)
-#define CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL                (1 << 10)
-#define CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL               (1 << 11)
-#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL            (1 << 12)
-#define CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL                 (1 << 13)
-#define CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL                (1 << 14)
-#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL     (1 << 15)
-#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL     (1 << 16)
-#define CL_QUEUE_CAPABILITY_MARKER_INTEL                    (1 << 24)
-#define CL_QUEUE_CAPABILITY_BARRIER_INTEL                   (1 << 25)
-#define CL_QUEUE_CAPABILITY_KERNEL_INTEL                    (1 << 26)
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* __CL_EXT_H */

diff --git a/dependencies/ocl-headers/CL/cl_gl.h b/dependencies/ocl-headers/CL/cl_gl.h
deleted file mode 100644
index 5ea0fd8..0000000
--- a/dependencies/ocl-headers/CL/cl_gl.h
+++ /dev/null

@@ -1,169 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2021 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#ifndef __OPENCL_CL_GL_H
-#define __OPENCL_CL_GL_H
-
-#include <CL/cl.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef cl_uint     cl_gl_object_type;
-typedef cl_uint     cl_gl_texture_info;
-typedef cl_uint     cl_gl_platform_info;
-typedef struct __GLsync *cl_GLsync;
-
-/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
-#define CL_GL_OBJECT_BUFFER                     0x2000
-#define CL_GL_OBJECT_TEXTURE2D                  0x2001
-#define CL_GL_OBJECT_TEXTURE3D                  0x2002
-#define CL_GL_OBJECT_RENDERBUFFER               0x2003
-#ifdef CL_VERSION_1_2
-#define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
-#define CL_GL_OBJECT_TEXTURE1D                  0x200F
-#define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
-#define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
-#endif
-
-/* cl_gl_texture_info           */
-#define CL_GL_TEXTURE_TARGET                    0x2004
-#define CL_GL_MIPMAP_LEVEL                      0x2005
-#ifdef CL_VERSION_1_2
-#define CL_GL_NUM_SAMPLES                       0x2012
-#endif
-
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLBuffer(cl_context     context,
-                     cl_mem_flags   flags,
-                     cl_GLuint      bufobj,
-                     cl_int *       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-#ifdef CL_VERSION_1_2
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLTexture(cl_context      context,
-                      cl_mem_flags    flags,
-                      cl_GLenum       target,
-                      cl_GLint        miplevel,
-                      cl_GLuint       texture,
-                      cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-
-#endif
-
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLRenderbuffer(cl_context   context,
-                           cl_mem_flags flags,
-                           cl_GLuint    renderbuffer,
-                           cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLObjectInfo(cl_mem                memobj,
-                  cl_gl_object_type *   gl_object_type,
-                  cl_GLuint *           gl_object_name) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLTextureInfo(cl_mem               memobj,
-                   cl_gl_texture_info   param_name,
-                   size_t               param_value_size,
-                   void *               param_value,
-                   size_t *             param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireGLObjects(cl_command_queue      command_queue,
-                          cl_uint               num_objects,
-                          const cl_mem *        mem_objects,
-                          cl_uint               num_events_in_wait_list,
-                          const cl_event *      event_wait_list,
-                          cl_event *            event) CL_API_SUFFIX__VERSION_1_0;
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseGLObjects(cl_command_queue      command_queue,
-                          cl_uint               num_objects,
-                          const cl_mem *        mem_objects,
-                          cl_uint               num_events_in_wait_list,
-                          const cl_event *      event_wait_list,
-                          cl_event *            event) CL_API_SUFFIX__VERSION_1_0;
-
-
-/* Deprecated OpenCL 1.1 APIs */
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateFromGLTexture2D(cl_context      context,
-                        cl_mem_flags    flags,
-                        cl_GLenum       target,
-                        cl_GLint        miplevel,
-                        cl_GLuint       texture,
-                        cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateFromGLTexture3D(cl_context      context,
-                        cl_mem_flags    flags,
-                        cl_GLenum       target,
-                        cl_GLint        miplevel,
-                        cl_GLuint       texture,
-                        cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-
-/* cl_khr_gl_sharing extension  */
-
-#define cl_khr_gl_sharing 1
-
-typedef cl_uint     cl_gl_context_info;
-
-/* Additional Error Codes  */
-#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
-
-/* cl_gl_context_info  */
-#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
-#define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
-
-/* Additional cl_context_properties  */
-#define CL_GL_CONTEXT_KHR                       0x2008
-#define CL_EGL_DISPLAY_KHR                      0x2009
-#define CL_GLX_DISPLAY_KHR                      0x200A
-#define CL_WGL_HDC_KHR                          0x200B
-#define CL_CGL_SHAREGROUP_KHR                   0x200C
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLContextInfoKHR(const cl_context_properties * properties,
-                      cl_gl_context_info            param_name,
-                      size_t                        param_value_size,
-                      void *                        param_value,
-                      size_t *                      param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-
-typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
-    const cl_context_properties * properties,
-    cl_gl_context_info            param_name,
-    size_t                        param_value_size,
-    void *                        param_value,
-    size_t *                      param_value_size_ret);
-
-/* 
- *  cl_khr_gl_event extension
- */
-#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D
-
-extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateEventFromGLsyncKHR(cl_context context,
-                           cl_GLsync  sync,
-                           cl_int *   errcode_ret) CL_API_SUFFIX__VERSION_1_1;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* __OPENCL_CL_GL_H */

diff --git a/dependencies/ocl-headers/CL/cl_gl_ext.h b/dependencies/ocl-headers/CL/cl_gl_ext.h
deleted file mode 100644
index 8ec8181..0000000
--- a/dependencies/ocl-headers/CL/cl_gl_ext.h
+++ /dev/null

@@ -1,18 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2021 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#include <CL/cl_gl.h>
-#pragma message("All OpenGL-related extensions have been moved into cl_gl.h.  Please include cl_gl.h directly.")

diff --git a/dependencies/ocl-headers/CL/cl_half.h b/dependencies/ocl-headers/CL/cl_half.h
deleted file mode 100644
index ecc4223..0000000
--- a/dependencies/ocl-headers/CL/cl_half.h
+++ /dev/null

@@ -1,440 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2019-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-/**
- * This is a header-only utility library that provides OpenCL host code with
- * routines for converting to/from cl_half values.
- *
- * Example usage:
- *
- *    #include <CL/cl_half.h>
- *    ...
- *    cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
- *    cl_float f = cl_half_to_float(h);
- */
-
-#ifndef OPENCL_CL_HALF_H
-#define OPENCL_CL_HALF_H
-
-#include <CL/cl_platform.h>
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/**
- * Rounding mode used when converting to cl_half.
- */
-typedef enum
-{
-  CL_HALF_RTE, // round to nearest even
-  CL_HALF_RTZ, // round towards zero
-  CL_HALF_RTP, // round towards positive infinity
-  CL_HALF_RTN, // round towards negative infinity
-} cl_half_rounding_mode;
-
-
-/* Private utility macros. */
-#define CL_HALF_EXP_MASK 0x7C00
-#define CL_HALF_MAX_FINITE_MAG 0x7BFF
-
-
-/*
- * Utility to deal with values that overflow when converting to half precision.
- */
-static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
-                                              uint16_t sign)
-{
-  if (rounding_mode == CL_HALF_RTZ)
-  {
-    // Round overflow towards zero -> largest finite number (preserving sign)
-    return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
-  }
-  else if (rounding_mode == CL_HALF_RTP && sign)
-  {
-    // Round negative overflow towards positive infinity -> most negative finite number
-    return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
-  }
-  else if (rounding_mode == CL_HALF_RTN && !sign)
-  {
-    // Round positive overflow towards negative infinity -> largest finite number
-    return CL_HALF_MAX_FINITE_MAG;
-  }
-
-  // Overflow to infinity
-  return (sign << 15) | CL_HALF_EXP_MASK;
-}
-
-/*
- * Utility to deal with values that underflow when converting to half precision.
- */
-static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
-                                               uint16_t sign)
-{
-  if (rounding_mode == CL_HALF_RTP && !sign)
-  {
-    // Round underflow towards positive infinity -> smallest positive value
-    return (sign << 15) | 1;
-  }
-  else if (rounding_mode == CL_HALF_RTN && sign)
-  {
-    // Round underflow towards negative infinity -> largest negative value
-    return (sign << 15) | 1;
-  }
-
-  // Flush to zero
-  return (sign << 15);
-}
-
-
-/**
- * Convert a cl_float to a cl_half.
- */
-static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
-{
-  // Type-punning to get direct access to underlying bits
-  union
-  {
-    cl_float f;
-    uint32_t i;
-  } f32;
-  f32.f = f;
-
-  // Extract sign bit
-  uint16_t sign = f32.i >> 31;
-
-  // Extract FP32 exponent and mantissa
-  uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
-  uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
-
-  // Remove FP32 exponent bias
-  int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
-
-  // Add FP16 exponent bias
-  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
-
-  // Position of the bit that will become the FP16 mantissa LSB
-  uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
-
-  // Check for NaN / infinity
-  if (f_exp == 0xFF)
-  {
-    if (f_mant)
-    {
-      // NaN -> propagate mantissa and silence it
-      uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
-      h_mant |= 0x200;
-      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
-    }
-    else
-    {
-      // Infinity -> zero mantissa
-      return (sign << 15) | CL_HALF_EXP_MASK;
-    }
-  }
-
-  // Check for zero
-  if (!f_exp && !f_mant)
-  {
-    return (sign << 15);
-  }
-
-  // Check for overflow
-  if (exp >= CL_HALF_MAX_EXP)
-  {
-    return cl_half_handle_overflow(rounding_mode, sign);
-  }
-
-  // Check for underflow
-  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
-  {
-    return cl_half_handle_underflow(rounding_mode, sign);
-  }
-
-  // Check for value that will become denormal
-  if (exp < -14)
-  {
-    // Denormal -> include the implicit 1 from the FP32 mantissa
-    h_exp = 0;
-    f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
-
-    // Mantissa shift amount depends on exponent
-    lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
-  }
-
-  // Generate FP16 mantissa by shifting FP32 mantissa
-  uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
-
-  // Check whether we need to round
-  uint32_t halfway = 1 << (lsb_pos - 1);
-  uint32_t mask = (halfway << 1) - 1;
-  switch (rounding_mode)
-  {
-    case CL_HALF_RTE:
-      if ((f_mant & mask) > halfway)
-      {
-        // More than halfway -> round up
-        h_mant += 1;
-      }
-      else if ((f_mant & mask) == halfway)
-      {
-        // Exactly halfway -> round to nearest even
-        if (h_mant & 0x1)
-          h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTZ:
-      // Mantissa has already been truncated -> do nothing
-      break;
-    case CL_HALF_RTP:
-      if ((f_mant & mask) && !sign)
-      {
-        // Round positive numbers up
-        h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTN:
-      if ((f_mant & mask) && sign)
-      {
-        // Round negative numbers down
-        h_mant += 1;
-      }
-      break;
-  }
-
-  // Check for mantissa overflow
-  if (h_mant & 0x400)
-  {
-    h_exp += 1;
-    h_mant = 0;
-  }
-
-  return (sign << 15) | (h_exp << 10) | h_mant;
-}
-
-
-/**
- * Convert a cl_double to a cl_half.
- */
-static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
-{
-  // Type-punning to get direct access to underlying bits
-  union
-  {
-    cl_double d;
-    uint64_t i;
-  } f64;
-  f64.d = d;
-
-  // Extract sign bit
-  uint16_t sign = f64.i >> 63;
-
-  // Extract FP64 exponent and mantissa
-  uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
-  uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
-
-  // Remove FP64 exponent bias
-  int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
-
-  // Add FP16 exponent bias
-  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
-
-  // Position of the bit that will become the FP16 mantissa LSB
-  uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
-
-  // Check for NaN / infinity
-  if (d_exp == 0x7FF)
-  {
-    if (d_mant)
-    {
-      // NaN -> propagate mantissa and silence it
-      uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
-      h_mant |= 0x200;
-      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
-    }
-    else
-    {
-      // Infinity -> zero mantissa
-      return (sign << 15) | CL_HALF_EXP_MASK;
-    }
-  }
-
-  // Check for zero
-  if (!d_exp && !d_mant)
-  {
-    return (sign << 15);
-  }
-
-  // Check for overflow
-  if (exp >= CL_HALF_MAX_EXP)
-  {
-    return cl_half_handle_overflow(rounding_mode, sign);
-  }
-
-  // Check for underflow
-  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
-  {
-    return cl_half_handle_underflow(rounding_mode, sign);
-  }
-
-  // Check for value that will become denormal
-  if (exp < -14)
-  {
-    // Include the implicit 1 from the FP64 mantissa
-    h_exp = 0;
-    d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
-
-    // Mantissa shift amount depends on exponent
-    lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
-  }
-
-  // Generate FP16 mantissa by shifting FP64 mantissa
-  uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
-
-  // Check whether we need to round
-  uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
-  uint64_t mask = (halfway << 1) - 1;
-  switch (rounding_mode)
-  {
-    case CL_HALF_RTE:
-      if ((d_mant & mask) > halfway)
-      {
-        // More than halfway -> round up
-        h_mant += 1;
-      }
-      else if ((d_mant & mask) == halfway)
-      {
-        // Exactly halfway -> round to nearest even
-        if (h_mant & 0x1)
-          h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTZ:
-      // Mantissa has already been truncated -> do nothing
-      break;
-    case CL_HALF_RTP:
-      if ((d_mant & mask) && !sign)
-      {
-        // Round positive numbers up
-        h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTN:
-      if ((d_mant & mask) && sign)
-      {
-        // Round negative numbers down
-        h_mant += 1;
-      }
-      break;
-  }
-
-  // Check for mantissa overflow
-  if (h_mant & 0x400)
-  {
-    h_exp += 1;
-    h_mant = 0;
-  }
-
-  return (sign << 15) | (h_exp << 10) | h_mant;
-}
-
-
-/**
- * Convert a cl_half to a cl_float.
- */
-static inline cl_float cl_half_to_float(cl_half h)
-{
-  // Type-punning to get direct access to underlying bits
-  union
-  {
-    cl_float f;
-    uint32_t i;
-  } f32;
-
-  // Extract sign bit
-  uint16_t sign = h >> 15;
-
-  // Extract FP16 exponent and mantissa
-  uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
-  uint16_t h_mant = h & 0x3FF;
-
-  // Remove FP16 exponent bias
-  int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
-
-  // Add FP32 exponent bias
-  uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
-
-  // Check for NaN / infinity
-  if (h_exp == 0x1F)
-  {
-    if (h_mant)
-    {
-      // NaN -> propagate mantissa and silence it
-      uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
-      f_mant |= 0x400000;
-      f32.i = (sign << 31) | 0x7F800000 | f_mant;
-      return f32.f;
-    }
-    else
-    {
-      // Infinity -> zero mantissa
-      f32.i = (sign << 31) | 0x7F800000;
-      return f32.f;
-    }
-  }
-
-  // Check for zero / denormal
-  if (h_exp == 0)
-  {
-    if (h_mant == 0)
-    {
-      // Zero -> zero exponent
-      f_exp = 0;
-    }
-    else
-    {
-      // Denormal -> normalize it
-      // - Shift mantissa to make most-significant 1 implicit
-      // - Adjust exponent accordingly
-      uint32_t shift = 0;
-      while ((h_mant & 0x400) == 0)
-      {
-        h_mant <<= 1;
-        shift++;
-      }
-      h_mant &= 0x3FF;
-      f_exp -= shift - 1;
-    }
-  }
-
-  f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
-  return f32.f;
-}
-
-
-#undef CL_HALF_EXP_MASK
-#undef CL_HALF_MAX_FINITE_MAG
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif  /* OPENCL_CL_HALF_H */

diff --git a/dependencies/ocl-headers/CL/cl_platform.h b/dependencies/ocl-headers/CL/cl_platform.h
deleted file mode 100644
index 8ae655d..0000000
--- a/dependencies/ocl-headers/CL/cl_platform.h
+++ /dev/null

@@ -1,1404 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#ifndef __CL_PLATFORM_H
-#define __CL_PLATFORM_H
-
-#include <CL/cl_version.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(_WIN32)
-    #if !defined(CL_API_ENTRY)
-        #define CL_API_ENTRY
-    #endif
-    #if !defined(CL_API_CALL)
-        #define CL_API_CALL     __stdcall
-    #endif
-    #if !defined(CL_CALLBACK)
-        #define CL_CALLBACK     __stdcall
-    #endif
-#else
-    #if !defined(CL_API_ENTRY)
-        #define CL_API_ENTRY
-    #endif
-    #if !defined(CL_API_CALL)
-        #define CL_API_CALL
-    #endif
-    #if !defined(CL_CALLBACK)
-        #define CL_CALLBACK
-    #endif
-#endif
-
-/*
- * Deprecation flags refer to the last version of the header in which the
- * feature was not deprecated.
- *
- * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without
- * deprecation but is deprecated in versions later than 1.1.
- */
-
-#ifndef CL_API_SUFFIX_USER
-#define CL_API_SUFFIX_USER
-#endif
-
-#ifndef CL_API_PREFIX_USER
-#define CL_API_PREFIX_USER
-#endif
-
-#define CL_API_SUFFIX_COMMON CL_API_SUFFIX_USER
-#define CL_API_PREFIX_COMMON CL_API_PREFIX_USER
-
-#define CL_API_SUFFIX__VERSION_1_0 CL_API_SUFFIX_COMMON
-#define CL_API_SUFFIX__VERSION_1_1 CL_API_SUFFIX_COMMON
-#define CL_API_SUFFIX__VERSION_1_2 CL_API_SUFFIX_COMMON
-#define CL_API_SUFFIX__VERSION_2_0 CL_API_SUFFIX_COMMON
-#define CL_API_SUFFIX__VERSION_2_1 CL_API_SUFFIX_COMMON
-#define CL_API_SUFFIX__VERSION_2_2 CL_API_SUFFIX_COMMON
-#define CL_API_SUFFIX__VERSION_3_0 CL_API_SUFFIX_COMMON
-#define CL_API_SUFFIX__EXPERIMENTAL CL_API_SUFFIX_COMMON
-
-
-#ifdef __GNUC__
-  #define CL_API_SUFFIX_DEPRECATED __attribute__((deprecated))
-  #define CL_API_PREFIX_DEPRECATED
-#elif defined(_WIN32)
-  #define CL_API_SUFFIX_DEPRECATED
-  #define CL_API_PREFIX_DEPRECATED __declspec(deprecated)
-#else
-  #define CL_API_SUFFIX_DEPRECATED
-  #define CL_API_PREFIX_DEPRECATED
-#endif
-
-#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
-    #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON
-    #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON
-#else
-    #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED
-    #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED
-#endif
-
-#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
-    #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON
-    #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON
-#else
-    #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED
-    #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED
-#endif
-
-#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS
-    #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON
-    #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON
-#else
-    #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED
-    #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED
- #endif
-
-#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS
-    #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON
-    #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON
-#else
-    #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED
-    #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED
-#endif
-
-#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS
-    #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON
-    #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON
-#else
-    #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED
-    #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED
-#endif
-
-#ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS
-    #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON
-    #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON
-#else
-    #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED
-    #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED
-#endif
-
-#if (defined (_WIN32) && defined(_MSC_VER))
-
-/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */
-/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */
-#if defined(__clang__) || _MSC_VER >= 1600
-    #include <stdint.h>
-#endif
-
-/* scalar types  */
-typedef signed   __int8         cl_char;
-typedef unsigned __int8         cl_uchar;
-typedef signed   __int16        cl_short;
-typedef unsigned __int16        cl_ushort;
-typedef signed   __int32        cl_int;
-typedef unsigned __int32        cl_uint;
-typedef signed   __int64        cl_long;
-typedef unsigned __int64        cl_ulong;
-
-typedef unsigned __int16        cl_half;
-typedef float                   cl_float;
-typedef double                  cl_double;
-
-/* Macro names and corresponding values defined by OpenCL */
-#define CL_CHAR_BIT         8
-#define CL_SCHAR_MAX        127
-#define CL_SCHAR_MIN        (-127-1)
-#define CL_CHAR_MAX         CL_SCHAR_MAX
-#define CL_CHAR_MIN         CL_SCHAR_MIN
-#define CL_UCHAR_MAX        255
-#define CL_SHRT_MAX         32767
-#define CL_SHRT_MIN         (-32767-1)
-#define CL_USHRT_MAX        65535
-#define CL_INT_MAX          2147483647
-#define CL_INT_MIN          (-2147483647-1)
-#define CL_UINT_MAX         0xffffffffU
-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
-
-#define CL_FLT_DIG          6
-#define CL_FLT_MANT_DIG     24
-#define CL_FLT_MAX_10_EXP   +38
-#define CL_FLT_MAX_EXP      +128
-#define CL_FLT_MIN_10_EXP   -37
-#define CL_FLT_MIN_EXP      -125
-#define CL_FLT_RADIX        2
-#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
-#define CL_FLT_MIN          1.175494350822287507969e-38f
-#define CL_FLT_EPSILON      1.1920928955078125e-7f
-
-#define CL_HALF_DIG          3
-#define CL_HALF_MANT_DIG     11
-#define CL_HALF_MAX_10_EXP   +4
-#define CL_HALF_MAX_EXP      +16
-#define CL_HALF_MIN_10_EXP   -4
-#define CL_HALF_MIN_EXP      -13
-#define CL_HALF_RADIX        2
-#define CL_HALF_MAX          65504.0f
-#define CL_HALF_MIN          6.103515625e-05f
-#define CL_HALF_EPSILON      9.765625e-04f
-
-#define CL_DBL_DIG          15
-#define CL_DBL_MANT_DIG     53
-#define CL_DBL_MAX_10_EXP   +308
-#define CL_DBL_MAX_EXP      +1024
-#define CL_DBL_MIN_10_EXP   -307
-#define CL_DBL_MIN_EXP      -1021
-#define CL_DBL_RADIX        2
-#define CL_DBL_MAX          1.7976931348623158e+308
-#define CL_DBL_MIN          2.225073858507201383090e-308
-#define CL_DBL_EPSILON      2.220446049250313080847e-16
-
-#define CL_M_E              2.7182818284590452354
-#define CL_M_LOG2E          1.4426950408889634074
-#define CL_M_LOG10E         0.43429448190325182765
-#define CL_M_LN2            0.69314718055994530942
-#define CL_M_LN10           2.30258509299404568402
-#define CL_M_PI             3.14159265358979323846
-#define CL_M_PI_2           1.57079632679489661923
-#define CL_M_PI_4           0.78539816339744830962
-#define CL_M_1_PI           0.31830988618379067154
-#define CL_M_2_PI           0.63661977236758134308
-#define CL_M_2_SQRTPI       1.12837916709551257390
-#define CL_M_SQRT2          1.41421356237309504880
-#define CL_M_SQRT1_2        0.70710678118654752440
-
-#define CL_M_E_F            2.718281828f
-#define CL_M_LOG2E_F        1.442695041f
-#define CL_M_LOG10E_F       0.434294482f
-#define CL_M_LN2_F          0.693147181f
-#define CL_M_LN10_F         2.302585093f
-#define CL_M_PI_F           3.141592654f
-#define CL_M_PI_2_F         1.570796327f
-#define CL_M_PI_4_F         0.785398163f
-#define CL_M_1_PI_F         0.318309886f
-#define CL_M_2_PI_F         0.636619772f
-#define CL_M_2_SQRTPI_F     1.128379167f
-#define CL_M_SQRT2_F        1.414213562f
-#define CL_M_SQRT1_2_F      0.707106781f
-
-#define CL_NAN              (CL_INFINITY - CL_INFINITY)
-#define CL_HUGE_VALF        ((cl_float) 1e50)
-#define CL_HUGE_VAL         ((cl_double) 1e500)
-#define CL_MAXFLOAT         CL_FLT_MAX
-#define CL_INFINITY         CL_HUGE_VALF
-
-#else
-
-#include <stdint.h>
-
-/* scalar types  */
-typedef int8_t          cl_char;
-typedef uint8_t         cl_uchar;
-typedef int16_t         cl_short;
-typedef uint16_t        cl_ushort;
-typedef int32_t         cl_int;
-typedef uint32_t        cl_uint;
-typedef int64_t         cl_long;
-typedef uint64_t        cl_ulong;
-
-typedef uint16_t        cl_half;
-typedef float           cl_float;
-typedef double          cl_double;
-
-/* Macro names and corresponding values defined by OpenCL */
-#define CL_CHAR_BIT         8
-#define CL_SCHAR_MAX        127
-#define CL_SCHAR_MIN        (-127-1)
-#define CL_CHAR_MAX         CL_SCHAR_MAX
-#define CL_CHAR_MIN         CL_SCHAR_MIN
-#define CL_UCHAR_MAX        255
-#define CL_SHRT_MAX         32767
-#define CL_SHRT_MIN         (-32767-1)
-#define CL_USHRT_MAX        65535
-#define CL_INT_MAX          2147483647
-#define CL_INT_MIN          (-2147483647-1)
-#define CL_UINT_MAX         0xffffffffU
-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
-
-#define CL_FLT_DIG          6
-#define CL_FLT_MANT_DIG     24
-#define CL_FLT_MAX_10_EXP   +38
-#define CL_FLT_MAX_EXP      +128
-#define CL_FLT_MIN_10_EXP   -37
-#define CL_FLT_MIN_EXP      -125
-#define CL_FLT_RADIX        2
-#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
-#define CL_FLT_MIN          1.175494350822287507969e-38f
-#define CL_FLT_EPSILON      1.1920928955078125e-7f
-
-#define CL_HALF_DIG          3
-#define CL_HALF_MANT_DIG     11
-#define CL_HALF_MAX_10_EXP   +4
-#define CL_HALF_MAX_EXP      +16
-#define CL_HALF_MIN_10_EXP   -4
-#define CL_HALF_MIN_EXP      -13
-#define CL_HALF_RADIX        2
-#define CL_HALF_MAX          65504.0f
-#define CL_HALF_MIN          6.103515625e-05f
-#define CL_HALF_EPSILON      9.765625e-04f
-
-#define CL_DBL_DIG          15
-#define CL_DBL_MANT_DIG     53
-#define CL_DBL_MAX_10_EXP   +308
-#define CL_DBL_MAX_EXP      +1024
-#define CL_DBL_MIN_10_EXP   -307
-#define CL_DBL_MIN_EXP      -1021
-#define CL_DBL_RADIX        2
-#define CL_DBL_MAX          179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
-#define CL_DBL_MIN          2.225073858507201383090e-308
-#define CL_DBL_EPSILON      2.220446049250313080847e-16
-
-#define CL_M_E              2.7182818284590452354
-#define CL_M_LOG2E          1.4426950408889634074
-#define CL_M_LOG10E         0.43429448190325182765
-#define CL_M_LN2            0.69314718055994530942
-#define CL_M_LN10           2.30258509299404568402
-#define CL_M_PI             3.14159265358979323846
-#define CL_M_PI_2           1.57079632679489661923
-#define CL_M_PI_4           0.78539816339744830962
-#define CL_M_1_PI           0.31830988618379067154
-#define CL_M_2_PI           0.63661977236758134308
-#define CL_M_2_SQRTPI       1.12837916709551257390
-#define CL_M_SQRT2          1.41421356237309504880
-#define CL_M_SQRT1_2        0.70710678118654752440
-
-#define CL_M_E_F            2.718281828f
-#define CL_M_LOG2E_F        1.442695041f
-#define CL_M_LOG10E_F       0.434294482f
-#define CL_M_LN2_F          0.693147181f
-#define CL_M_LN10_F         2.302585093f
-#define CL_M_PI_F           3.141592654f
-#define CL_M_PI_2_F         1.570796327f
-#define CL_M_PI_4_F         0.785398163f
-#define CL_M_1_PI_F         0.318309886f
-#define CL_M_2_PI_F         0.636619772f
-#define CL_M_2_SQRTPI_F     1.128379167f
-#define CL_M_SQRT2_F        1.414213562f
-#define CL_M_SQRT1_2_F      0.707106781f
-
-#if defined( __GNUC__ )
-   #define CL_HUGE_VALF     __builtin_huge_valf()
-   #define CL_HUGE_VAL      __builtin_huge_val()
-   #define CL_NAN           __builtin_nanf( "" )
-#else
-   #define CL_HUGE_VALF     ((cl_float) 1e50)
-   #define CL_HUGE_VAL      ((cl_double) 1e500)
-   float nanf( const char * );
-   #define CL_NAN           nanf( "" )
-#endif
-#define CL_MAXFLOAT         CL_FLT_MAX
-#define CL_INFINITY         CL_HUGE_VALF
-
-#endif
-
-#include <stddef.h>
-
-/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
-typedef unsigned int cl_GLuint;
-typedef int          cl_GLint;
-typedef unsigned int cl_GLenum;
-
-/*
- * Vector types
- *
- *  Note:   OpenCL requires that all types be naturally aligned.
- *          This means that vector types must be naturally aligned.
- *          For example, a vector of four floats must be aligned to
- *          a 16 byte boundary (calculated as 4 * the natural 4-byte
- *          alignment of the float).  The alignment qualifiers here
- *          will only function properly if your compiler supports them
- *          and if you don't actively work to defeat them.  For example,
- *          in order for a cl_float4 to be 16 byte aligned in a struct,
- *          the start of the struct must itself be 16-byte aligned.
- *
- *          Maintaining proper alignment is the user's responsibility.
- */
-
-/* Define basic vector types */
-#if defined( __VEC__ )
-  #if !defined(__clang__)
-     #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
-  #endif
-   typedef __vector unsigned char     __cl_uchar16;
-   typedef __vector signed char       __cl_char16;
-   typedef __vector unsigned short    __cl_ushort8;
-   typedef __vector signed short      __cl_short8;
-   typedef __vector unsigned int      __cl_uint4;
-   typedef __vector signed int        __cl_int4;
-   typedef __vector float             __cl_float4;
-   #define  __CL_UCHAR16__  1
-   #define  __CL_CHAR16__   1
-   #define  __CL_USHORT8__  1
-   #define  __CL_SHORT8__   1
-   #define  __CL_UINT4__    1
-   #define  __CL_INT4__     1
-   #define  __CL_FLOAT4__   1
-#endif
-
-#if defined( __SSE__ )
-    #if defined( __MINGW64__ )
-        #include <intrin.h>
-    #else
-        #include <xmmintrin.h>
-    #endif
-    #if defined( __GNUC__ )
-        typedef float __cl_float4   __attribute__((vector_size(16)));
-    #else
-        typedef __m128 __cl_float4;
-    #endif
-    #define __CL_FLOAT4__   1
-#endif
-
-#if defined( __SSE2__ )
-    #if defined( __MINGW64__ )
-        #include <intrin.h>
-    #else
-        #include <emmintrin.h>
-    #endif
-    #if defined( __GNUC__ )
-        typedef cl_uchar    __cl_uchar16    __attribute__((vector_size(16)));
-        typedef cl_char     __cl_char16     __attribute__((vector_size(16)));
-        typedef cl_ushort   __cl_ushort8    __attribute__((vector_size(16)));
-        typedef cl_short    __cl_short8     __attribute__((vector_size(16)));
-        typedef cl_uint     __cl_uint4      __attribute__((vector_size(16)));
-        typedef cl_int      __cl_int4       __attribute__((vector_size(16)));
-        typedef cl_ulong    __cl_ulong2     __attribute__((vector_size(16)));
-        typedef cl_long     __cl_long2      __attribute__((vector_size(16)));
-        typedef cl_double   __cl_double2    __attribute__((vector_size(16)));
-    #else
-        typedef __m128i __cl_uchar16;
-        typedef __m128i __cl_char16;
-        typedef __m128i __cl_ushort8;
-        typedef __m128i __cl_short8;
-        typedef __m128i __cl_uint4;
-        typedef __m128i __cl_int4;
-        typedef __m128i __cl_ulong2;
-        typedef __m128i __cl_long2;
-        typedef __m128d __cl_double2;
-    #endif
-    #define __CL_UCHAR16__  1
-    #define __CL_CHAR16__   1
-    #define __CL_USHORT8__  1
-    #define __CL_SHORT8__   1
-    #define __CL_INT4__     1
-    #define __CL_UINT4__    1
-    #define __CL_ULONG2__   1
-    #define __CL_LONG2__    1
-    #define __CL_DOUBLE2__  1
-#endif
-
-#if defined( __MMX__ )
-    #include <mmintrin.h>
-    #if defined( __GNUC__ )
-        typedef cl_uchar    __cl_uchar8     __attribute__((vector_size(8)));
-        typedef cl_char     __cl_char8      __attribute__((vector_size(8)));
-        typedef cl_ushort   __cl_ushort4    __attribute__((vector_size(8)));
-        typedef cl_short    __cl_short4     __attribute__((vector_size(8)));
-        typedef cl_uint     __cl_uint2      __attribute__((vector_size(8)));
-        typedef cl_int      __cl_int2       __attribute__((vector_size(8)));
-        typedef cl_ulong    __cl_ulong1     __attribute__((vector_size(8)));
-        typedef cl_long     __cl_long1      __attribute__((vector_size(8)));
-        typedef cl_float    __cl_float2     __attribute__((vector_size(8)));
-    #else
-        typedef __m64       __cl_uchar8;
-        typedef __m64       __cl_char8;
-        typedef __m64       __cl_ushort4;
-        typedef __m64       __cl_short4;
-        typedef __m64       __cl_uint2;
-        typedef __m64       __cl_int2;
-        typedef __m64       __cl_ulong1;
-        typedef __m64       __cl_long1;
-        typedef __m64       __cl_float2;
-    #endif
-    #define __CL_UCHAR8__   1
-    #define __CL_CHAR8__    1
-    #define __CL_USHORT4__  1
-    #define __CL_SHORT4__   1
-    #define __CL_INT2__     1
-    #define __CL_UINT2__    1
-    #define __CL_ULONG1__   1
-    #define __CL_LONG1__    1
-    #define __CL_FLOAT2__   1
-#endif
-
-#if defined( __AVX__ )
-    #if defined( __MINGW64__ )
-        #include <intrin.h>
-    #else
-        #include <immintrin.h>
-    #endif
-    #if defined( __GNUC__ )
-        typedef cl_float    __cl_float8     __attribute__((vector_size(32)));
-        typedef cl_double   __cl_double4    __attribute__((vector_size(32)));
-    #else
-        typedef __m256      __cl_float8;
-        typedef __m256d     __cl_double4;
-    #endif
-    #define __CL_FLOAT8__   1
-    #define __CL_DOUBLE4__  1
-#endif
-
-/* Define capabilities for anonymous struct members. */
-#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-#define  __CL_HAS_ANON_STRUCT__ 1
-#define  __CL_ANON_STRUCT__
-#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
-#define  __CL_HAS_ANON_STRUCT__ 1
-#define  __CL_ANON_STRUCT__ __extension__
-#elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__)
-    #if _MSC_VER >= 1500
-   /* Microsoft Developer Studio 2008 supports anonymous structs, but
-    * complains by default. */
-    #define  __CL_HAS_ANON_STRUCT__ 1
-    #define  __CL_ANON_STRUCT__
-   /* Disable warning C4201: nonstandard extension used : nameless
-    * struct/union */
-    #pragma warning( push )
-    #pragma warning( disable : 4201 )
-    #endif
-#else
-#define  __CL_HAS_ANON_STRUCT__ 0
-#define  __CL_ANON_STRUCT__
-#endif
-
-/* Define alignment keys */
-#if defined( __GNUC__ ) || defined(__INTEGRITY)
-    #define CL_ALIGNED(_x)          __attribute__ ((aligned(_x)))
-#elif defined( _WIN32) && (_MSC_VER)
-    /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements     */
-    /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx                                                 */
-    /* #include <crtdefs.h>                                                                                             */
-    /* #define CL_ALIGNED(_x)          _CRT_ALIGN(_x)                                                                   */
-    #define CL_ALIGNED(_x)
-#else
-   #warning  Need to implement some method to align data here
-   #define  CL_ALIGNED(_x)
-#endif
-
-/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
-#if __CL_HAS_ANON_STRUCT__
-    /* .xyzw and .s0123...{f|F} are supported */
-    #define CL_HAS_NAMED_VECTOR_FIELDS 1
-    /* .hi and .lo are supported */
-    #define CL_HAS_HI_LO_VECTOR_FIELDS 1
-#endif
-
-/* Define cl_vector types */
-
-/* ---- cl_charn ---- */
-typedef union
-{
-    cl_char  CL_ALIGNED(2) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_char  lo, hi; };
-#endif
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2;
-#endif
-}cl_char2;
-
-typedef union
-{
-    cl_char  CL_ALIGNED(4) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; };
-#endif
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2[2];
-#endif
-#if defined( __CL_CHAR4__)
-    __cl_char4     v4;
-#endif
-}cl_char4;
-
-/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
-typedef  cl_char4  cl_char3;
-
-typedef union
-{
-    cl_char   CL_ALIGNED(8) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; };
-#endif
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2[4];
-#endif
-#if defined( __CL_CHAR4__)
-    __cl_char4     v4[2];
-#endif
-#if defined( __CL_CHAR8__ )
-    __cl_char8     v8;
-#endif
-}cl_char8;
-
-typedef union
-{
-    cl_char  CL_ALIGNED(16) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; };
-#endif
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2[8];
-#endif
-#if defined( __CL_CHAR4__)
-    __cl_char4     v4[4];
-#endif
-#if defined( __CL_CHAR8__ )
-    __cl_char8     v8[2];
-#endif
-#if defined( __CL_CHAR16__ )
-    __cl_char16    v16;
-#endif
-}cl_char16;
-
-
-/* ---- cl_ucharn ---- */
-typedef union
-{
-    cl_uchar  CL_ALIGNED(2) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  lo, hi; };
-#endif
-#if defined( __cl_uchar2__)
-    __cl_uchar2     v2;
-#endif
-}cl_uchar2;
-
-typedef union
-{
-    cl_uchar  CL_ALIGNED(4) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; };
-#endif
-#if defined( __CL_UCHAR2__)
-    __cl_uchar2     v2[2];
-#endif
-#if defined( __CL_UCHAR4__)
-    __cl_uchar4     v4;
-#endif
-}cl_uchar4;
-
-/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
-typedef  cl_uchar4  cl_uchar3;
-
-typedef union
-{
-    cl_uchar   CL_ALIGNED(8) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; };
-#endif
-#if defined( __CL_UCHAR2__)
-    __cl_uchar2     v2[4];
-#endif
-#if defined( __CL_UCHAR4__)
-    __cl_uchar4     v4[2];
-#endif
-#if defined( __CL_UCHAR8__ )
-    __cl_uchar8     v8;
-#endif
-}cl_uchar8;
-
-typedef union
-{
-    cl_uchar  CL_ALIGNED(16) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; };
-#endif
-#if defined( __CL_UCHAR2__)
-    __cl_uchar2     v2[8];
-#endif
-#if defined( __CL_UCHAR4__)
-    __cl_uchar4     v4[4];
-#endif
-#if defined( __CL_UCHAR8__ )
-    __cl_uchar8     v8[2];
-#endif
-#if defined( __CL_UCHAR16__ )
-    __cl_uchar16    v16;
-#endif
-}cl_uchar16;
-
-
-/* ---- cl_shortn ---- */
-typedef union
-{
-    cl_short  CL_ALIGNED(4) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_short  lo, hi; };
-#endif
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2;
-#endif
-}cl_short2;
-
-typedef union
-{
-    cl_short  CL_ALIGNED(8) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; };
-#endif
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2[2];
-#endif
-#if defined( __CL_SHORT4__)
-    __cl_short4     v4;
-#endif
-}cl_short4;
-
-/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
-typedef  cl_short4  cl_short3;
-
-typedef union
-{
-    cl_short   CL_ALIGNED(16) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; };
-#endif
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2[4];
-#endif
-#if defined( __CL_SHORT4__)
-    __cl_short4     v4[2];
-#endif
-#if defined( __CL_SHORT8__ )
-    __cl_short8     v8;
-#endif
-}cl_short8;
-
-typedef union
-{
-    cl_short  CL_ALIGNED(32) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; };
-#endif
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2[8];
-#endif
-#if defined( __CL_SHORT4__)
-    __cl_short4     v4[4];
-#endif
-#if defined( __CL_SHORT8__ )
-    __cl_short8     v8[2];
-#endif
-#if defined( __CL_SHORT16__ )
-    __cl_short16    v16;
-#endif
-}cl_short16;
-
-
-/* ---- cl_ushortn ---- */
-typedef union
-{
-    cl_ushort  CL_ALIGNED(4) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  lo, hi; };
-#endif
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2;
-#endif
-}cl_ushort2;
-
-typedef union
-{
-    cl_ushort  CL_ALIGNED(8) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; };
-#endif
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2[2];
-#endif
-#if defined( __CL_USHORT4__)
-    __cl_ushort4     v4;
-#endif
-}cl_ushort4;
-
-/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
-typedef  cl_ushort4  cl_ushort3;
-
-typedef union
-{
-    cl_ushort   CL_ALIGNED(16) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; };
-#endif
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2[4];
-#endif
-#if defined( __CL_USHORT4__)
-    __cl_ushort4     v4[2];
-#endif
-#if defined( __CL_USHORT8__ )
-    __cl_ushort8     v8;
-#endif
-}cl_ushort8;
-
-typedef union
-{
-    cl_ushort  CL_ALIGNED(32) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; };
-#endif
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2[8];
-#endif
-#if defined( __CL_USHORT4__)
-    __cl_ushort4     v4[4];
-#endif
-#if defined( __CL_USHORT8__ )
-    __cl_ushort8     v8[2];
-#endif
-#if defined( __CL_USHORT16__ )
-    __cl_ushort16    v16;
-#endif
-}cl_ushort16;
-
-
-/* ---- cl_halfn ---- */
-typedef union
-{
-    cl_half  CL_ALIGNED(4) s[2];
-#if __CL_HAS_ANON_STRUCT__
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1; };
-    __CL_ANON_STRUCT__ struct{ cl_half  lo, hi; };
-#endif
-#if defined( __CL_HALF2__)
-    __cl_half2     v2;
-#endif
-}cl_half2;
-
-typedef union
-{
-    cl_half  CL_ALIGNED(8) s[4];
-#if __CL_HAS_ANON_STRUCT__
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y, z, w; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1, s2, s3; };
-    __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; };
-#endif
-#if defined( __CL_HALF2__)
-    __cl_half2     v2[2];
-#endif
-#if defined( __CL_HALF4__)
-    __cl_half4     v4;
-#endif
-}cl_half4;
-
-/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */
-typedef  cl_half4  cl_half3;
-
-typedef union
-{
-    cl_half   CL_ALIGNED(16) s[8];
-#if __CL_HAS_ANON_STRUCT__
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y, z, w; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1, s2, s3, s4, s5, s6, s7; };
-    __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; };
-#endif
-#if defined( __CL_HALF2__)
-    __cl_half2     v2[4];
-#endif
-#if defined( __CL_HALF4__)
-    __cl_half4     v4[2];
-#endif
-#if defined( __CL_HALF8__ )
-    __cl_half8     v8;
-#endif
-}cl_half8;
-
-typedef union
-{
-    cl_half  CL_ALIGNED(32) s[16];
-#if __CL_HAS_ANON_STRUCT__
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-    __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; };
-#endif
-#if defined( __CL_HALF2__)
-    __cl_half2     v2[8];
-#endif
-#if defined( __CL_HALF4__)
-    __cl_half4     v4[4];
-#endif
-#if defined( __CL_HALF8__ )
-    __cl_half8     v8[2];
-#endif
-#if defined( __CL_HALF16__ )
-    __cl_half16    v16;
-#endif
-}cl_half16;
-
-/* ---- cl_intn ---- */
-typedef union
-{
-    cl_int  CL_ALIGNED(8) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_int  lo, hi; };
-#endif
-#if defined( __CL_INT2__)
-    __cl_int2     v2;
-#endif
-}cl_int2;
-
-typedef union
-{
-    cl_int  CL_ALIGNED(16) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; };
-#endif
-#if defined( __CL_INT2__)
-    __cl_int2     v2[2];
-#endif
-#if defined( __CL_INT4__)
-    __cl_int4     v4;
-#endif
-}cl_int4;
-
-/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
-typedef  cl_int4  cl_int3;
-
-typedef union
-{
-    cl_int   CL_ALIGNED(32) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; };
-#endif
-#if defined( __CL_INT2__)
-    __cl_int2     v2[4];
-#endif
-#if defined( __CL_INT4__)
-    __cl_int4     v4[2];
-#endif
-#if defined( __CL_INT8__ )
-    __cl_int8     v8;
-#endif
-}cl_int8;
-
-typedef union
-{
-    cl_int  CL_ALIGNED(64) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; };
-#endif
-#if defined( __CL_INT2__)
-    __cl_int2     v2[8];
-#endif
-#if defined( __CL_INT4__)
-    __cl_int4     v4[4];
-#endif
-#if defined( __CL_INT8__ )
-    __cl_int8     v8[2];
-#endif
-#if defined( __CL_INT16__ )
-    __cl_int16    v16;
-#endif
-}cl_int16;
-
-
-/* ---- cl_uintn ---- */
-typedef union
-{
-    cl_uint  CL_ALIGNED(8) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  lo, hi; };
-#endif
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2;
-#endif
-}cl_uint2;
-
-typedef union
-{
-    cl_uint  CL_ALIGNED(16) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; };
-#endif
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2[2];
-#endif
-#if defined( __CL_UINT4__)
-    __cl_uint4     v4;
-#endif
-}cl_uint4;
-
-/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
-typedef  cl_uint4  cl_uint3;
-
-typedef union
-{
-    cl_uint   CL_ALIGNED(32) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; };
-#endif
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2[4];
-#endif
-#if defined( __CL_UINT4__)
-    __cl_uint4     v4[2];
-#endif
-#if defined( __CL_UINT8__ )
-    __cl_uint8     v8;
-#endif
-}cl_uint8;
-
-typedef union
-{
-    cl_uint  CL_ALIGNED(64) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; };
-#endif
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2[8];
-#endif
-#if defined( __CL_UINT4__)
-    __cl_uint4     v4[4];
-#endif
-#if defined( __CL_UINT8__ )
-    __cl_uint8     v8[2];
-#endif
-#if defined( __CL_UINT16__ )
-    __cl_uint16    v16;
-#endif
-}cl_uint16;
-
-/* ---- cl_longn ---- */
-typedef union
-{
-    cl_long  CL_ALIGNED(16) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_long  lo, hi; };
-#endif
-#if defined( __CL_LONG2__)
-    __cl_long2     v2;
-#endif
-}cl_long2;
-
-typedef union
-{
-    cl_long  CL_ALIGNED(32) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; };
-#endif
-#if defined( __CL_LONG2__)
-    __cl_long2     v2[2];
-#endif
-#if defined( __CL_LONG4__)
-    __cl_long4     v4;
-#endif
-}cl_long4;
-
-/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
-typedef  cl_long4  cl_long3;
-
-typedef union
-{
-    cl_long   CL_ALIGNED(64) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; };
-#endif
-#if defined( __CL_LONG2__)
-    __cl_long2     v2[4];
-#endif
-#if defined( __CL_LONG4__)
-    __cl_long4     v4[2];
-#endif
-#if defined( __CL_LONG8__ )
-    __cl_long8     v8;
-#endif
-}cl_long8;
-
-typedef union
-{
-    cl_long  CL_ALIGNED(128) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; };
-#endif
-#if defined( __CL_LONG2__)
-    __cl_long2     v2[8];
-#endif
-#if defined( __CL_LONG4__)
-    __cl_long4     v4[4];
-#endif
-#if defined( __CL_LONG8__ )
-    __cl_long8     v8[2];
-#endif
-#if defined( __CL_LONG16__ )
-    __cl_long16    v16;
-#endif
-}cl_long16;
-
-
-/* ---- cl_ulongn ---- */
-typedef union
-{
-    cl_ulong  CL_ALIGNED(16) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  lo, hi; };
-#endif
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2;
-#endif
-}cl_ulong2;
-
-typedef union
-{
-    cl_ulong  CL_ALIGNED(32) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; };
-#endif
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2[2];
-#endif
-#if defined( __CL_ULONG4__)
-    __cl_ulong4     v4;
-#endif
-}cl_ulong4;
-
-/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
-typedef  cl_ulong4  cl_ulong3;
-
-typedef union
-{
-    cl_ulong   CL_ALIGNED(64) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; };
-#endif
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2[4];
-#endif
-#if defined( __CL_ULONG4__)
-    __cl_ulong4     v4[2];
-#endif
-#if defined( __CL_ULONG8__ )
-    __cl_ulong8     v8;
-#endif
-}cl_ulong8;
-
-typedef union
-{
-    cl_ulong  CL_ALIGNED(128) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; };
-#endif
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2[8];
-#endif
-#if defined( __CL_ULONG4__)
-    __cl_ulong4     v4[4];
-#endif
-#if defined( __CL_ULONG8__ )
-    __cl_ulong8     v8[2];
-#endif
-#if defined( __CL_ULONG16__ )
-    __cl_ulong16    v16;
-#endif
-}cl_ulong16;
-
-
-/* --- cl_floatn ---- */
-
-typedef union
-{
-    cl_float  CL_ALIGNED(8) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_float  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_float  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_float  lo, hi; };
-#endif
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2;
-#endif
-}cl_float2;
-
-typedef union
-{
-    cl_float  CL_ALIGNED(16) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_float   x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_float   s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_float2  lo, hi; };
-#endif
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2[2];
-#endif
-#if defined( __CL_FLOAT4__)
-    __cl_float4     v4;
-#endif
-}cl_float4;
-
-/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
-typedef  cl_float4  cl_float3;
-
-typedef union
-{
-    cl_float   CL_ALIGNED(32) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_float   x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_float   s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_float4  lo, hi; };
-#endif
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2[4];
-#endif
-#if defined( __CL_FLOAT4__)
-    __cl_float4     v4[2];
-#endif
-#if defined( __CL_FLOAT8__ )
-    __cl_float8     v8;
-#endif
-}cl_float8;
-
-typedef union
-{
-    cl_float  CL_ALIGNED(64) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_float  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_float  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; };
-#endif
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2[8];
-#endif
-#if defined( __CL_FLOAT4__)
-    __cl_float4     v4[4];
-#endif
-#if defined( __CL_FLOAT8__ )
-    __cl_float8     v8[2];
-#endif
-#if defined( __CL_FLOAT16__ )
-    __cl_float16    v16;
-#endif
-}cl_float16;
-
-/* --- cl_doublen ---- */
-
-typedef union
-{
-    cl_double  CL_ALIGNED(16) s[2];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_double s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_double lo, hi; };
-#endif
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2;
-#endif
-}cl_double2;
-
-typedef union
-{
-    cl_double  CL_ALIGNED(32) s[4];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_double  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; };
-#endif
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2[2];
-#endif
-#if defined( __CL_DOUBLE4__)
-    __cl_double4     v4;
-#endif
-}cl_double4;
-
-/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
-typedef  cl_double4  cl_double3;
-
-typedef union
-{
-    cl_double   CL_ALIGNED(64) s[8];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; };
-#endif
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2[4];
-#endif
-#if defined( __CL_DOUBLE4__)
-    __cl_double4     v4[2];
-#endif
-#if defined( __CL_DOUBLE8__ )
-    __cl_double8     v8;
-#endif
-}cl_double8;
-
-typedef union
-{
-    cl_double  CL_ALIGNED(128) s[16];
-#if __CL_HAS_ANON_STRUCT__
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; };
-#endif
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2[8];
-#endif
-#if defined( __CL_DOUBLE4__)
-    __cl_double4     v4[4];
-#endif
-#if defined( __CL_DOUBLE8__ )
-    __cl_double8     v8[2];
-#endif
-#if defined( __CL_DOUBLE16__ )
-    __cl_double16    v16;
-#endif
-}cl_double16;
-
-/* Macro to facilitate debugging
- * Usage:
- *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
- *   The first line ends with:   CL_PROGRAM_STRING_DEBUG_INFO \"
- *   Each line thereafter of OpenCL C source must end with: \n\
- *   The last line ends in ";
- *
- *   Example:
- *
- *   const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
- *   kernel void foo( int a, float * b )             \n\
- *   {                                               \n\
- *      // my comment                                \n\
- *      *b[ get_global_id(0)] = a;                   \n\
- *   }                                               \n\
- *   ";
- *
- * This should correctly set up the line, (column) and file information for your source
- * string so you can do source level debugging.
- */
-#define  __CL_STRINGIFY( _x )               # _x
-#define  _CL_STRINGIFY( _x )                __CL_STRINGIFY( _x )
-#define  CL_PROGRAM_STRING_DEBUG_INFO       "#line "  _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
-
-#ifdef __cplusplus
-}
-#endif
-
-#if defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__)
-    #if _MSC_VER >=1500
-    #pragma warning( pop )
-    #endif
-#endif
-
-#endif  /* __CL_PLATFORM_H  */

diff --git a/dependencies/ocl-headers/CL/cl_version.h b/dependencies/ocl-headers/CL/cl_version.h
deleted file mode 100644
index 3844938..0000000
--- a/dependencies/ocl-headers/CL/cl_version.h
+++ /dev/null

@@ -1,81 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2018-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#ifndef __CL_VERSION_H
-#define __CL_VERSION_H
-
-/* Detect which version to target */
-#if !defined(CL_TARGET_OPENCL_VERSION)
-#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)")
-#define CL_TARGET_OPENCL_VERSION 300
-#endif
-#if CL_TARGET_OPENCL_VERSION != 100 && \
-    CL_TARGET_OPENCL_VERSION != 110 && \
-    CL_TARGET_OPENCL_VERSION != 120 && \
-    CL_TARGET_OPENCL_VERSION != 200 && \
-    CL_TARGET_OPENCL_VERSION != 210 && \
-    CL_TARGET_OPENCL_VERSION != 220 && \
-    CL_TARGET_OPENCL_VERSION != 300
-#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)")
-#undef CL_TARGET_OPENCL_VERSION
-#define CL_TARGET_OPENCL_VERSION 300
-#endif
-
-
-/* OpenCL Version */
-#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
-#define CL_VERSION_3_0  1
-#endif
-#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
-#define CL_VERSION_2_2  1
-#endif
-#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
-#define CL_VERSION_2_1  1
-#endif
-#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
-#define CL_VERSION_2_0  1
-#endif
-#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
-#define CL_VERSION_1_2  1
-#endif
-#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
-#define CL_VERSION_1_1  1
-#endif
-#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
-#define CL_VERSION_1_0  1
-#endif
-
-/* Allow deprecated APIs for older OpenCL versions. */
-#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
-#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
-#endif
-#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
-#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
-#endif
-#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
-#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
-#endif
-#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
-#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
-#endif
-#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
-#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
-#endif
-#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
-#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
-#endif
-
-#endif  /* __CL_VERSION_H */

diff --git a/dependencies/ocl-headers/CL/opencl.h b/dependencies/ocl-headers/CL/opencl.h
deleted file mode 100644
index ef8dd1e..0000000
--- a/dependencies/ocl-headers/CL/opencl.h
+++ /dev/null

@@ -1,32 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2021 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#ifndef __OPENCL_H
-#define __OPENCL_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <CL/cl.h>
-#include <CL/cl_gl.h>
-#include <CL/cl_ext.h>
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* __OPENCL_H   */

diff --git a/dependencies/ocl-stubs/apis_generator.py b/dependencies/ocl-stubs/apis_generator.py
deleted file mode 100644
index 8cc0954..0000000
--- a/dependencies/ocl-stubs/apis_generator.py
+++ /dev/null

@@ -1,164 +0,0 @@
-#!/usr/bin/env python3
-"""apis_generator.py - Generate a C++ interface that automates loading OpenCL.
-
-Usage: apis_generator.py <headerPaths...>
-
-The generated code looks roughly like this:
-------------------------------------------------------------------------
-
-// apis.h
-
-CL_MACRO ( returnType, funcname, (fargs...), (callArgs...) )
-
-"""
-
-import os.path
-import re
-import sys
-
-GENERATED_FILE_WARNING = """
-/*
- * This file is generated by {}
- * Do not edit this file directly.
- */""".format(os.path.basename(__file__))
-
-MACRO_GUARD = """
-#ifndef CL_MACRO
-#error You need to define CL_MACRO before including apis
-#endif"""
-
-
-def include_for_header(header):
-  return '#include <CL/{}>'.format(header)
-
-
-def extract_license_lines(lines):
-  license_lines = []
-  for line in lines:
-    license_lines.append(line)
-    if line.find('*/') != -1:
-      return license_lines
-  sys.exit("License text didn't terminate")
-
-
-assert (extract_license_lines(['/* LICENSE */',
-                               'something']) == ['/* LICENSE */'])
-assert (extract_license_lines(['/* LICENSE', ' * TEXT */',
-                               'something']) == ['/* LICENSE', ' * TEXT */'])
-assert (extract_license_lines(['/* LICENSE', ' * TEXT', ' */', 'something'
-                              ]) == ['/* LICENSE', ' * TEXT', ' */'])
-
-
-def parse_arg_strs(str):
-  paren_depth = 0
-  current_arg = ''
-  ret = []
-  for c in str:
-    if c == '(':
-      paren_depth += 1
-    elif c == ')':
-      paren_depth -= 1
-    if c == ',' and paren_depth == 0:
-      ret.append(current_arg)
-      current_arg = ''
-    else:
-      current_arg += c
-  if current_arg != '':
-    ret.append(current_arg)
-  return ret
-
-
-def process_type(raw):
-  # strip redundant [] (where one is before the name)
-  raw = re.sub(r'(\[[0-9]*\])\s*(\w+)\s*\[[0-9]*\]', r'\2\1', raw)
-  # strip cases where the name comment hinted at the number of elements in an array
-  raw = re.sub(r'\*\s*(\w+)\s*\[[0-9]+\]', r'*\1', raw)
-  raw = ' '.join(raw.split())
-  return raw
-
-
-def parse_api(api_signature):
-  m = None
-
-  api_signature = re.sub('extern', '', api_signature)
-  api_signature = re.sub('CL_\w+', '', api_signature)
-
-  m = re.match(r'\s*(.*)\s+(\w+)\((.*)\)\s*;', api_signature)
-  if m == None:
-    print(api_signature)
-
-  assert (m is not None)
-  assert (len(m.groups()) == 3)
-  arg_strs = None
-  if re.match('\s*void\s*', m.group(3)):
-    arg_strs = []
-  else:
-    arg_strs = parse_arg_strs(m.group(3))
-  args = []
-  for arg_str in arg_strs:
-    nm = re.search(r'(\w+)\s*(\)|\[|$)', arg_str)
-    assert (nm is not None)
-    args.append({'type': process_type(arg_str), 'name': nm.group(1)})
-  return {'return': m.group(1).strip(), 'name': m.group(2), 'args': args}
-
-
-def extract_apis(lines):
-  state = 'scanning'
-  apis = []
-  api_signature = ''
-  for line in lines:
-    if state == 'scanning':
-      if line.find('CL_API_ENTRY') != -1 and line.find('typedef') == -1:
-        api_signature = line
-        if line.find(';') != -1:
-          apis.append(
-              parse_api(
-                  api_signature.replace('/*', '').replace('*/', '').replace(
-                      'CL_CALLBACK ', '')))
-          api_signature = ''
-        else:
-          state = 'expectAPILine'
-    elif state == 'expectAPILine':
-      api_signature += ' ' + line
-      if line.find(';') != -1:
-        apis.append(
-            parse_api(
-                api_signature.replace('/*', '').replace('*/', '').replace(
-                    'CL_CALLBACK ', '')))
-        api_signature = ''
-        state = 'scanning'
-  return apis
-
-
-def generate_apis(apis):
-  print(GENERATED_FILE_WARNING)
-  print()
-  print(MACRO_GUARD)
-  print()
-
-  for api in apis:
-    fargs = (arg['type'] for arg in api['args'])
-    cargs = (arg['name'] for arg in api['args'])
-    print('CL_MACRO( {}, {}, ({}), ({}) )\n'.format(api['return'], api['name'],
-                                                    ', '.join(fargs),
-                                                    ', '.join(cargs)))
-
-
-def main():
-  headers = sys.argv[1:]
-  apis = []
-
-  with open(headers[0]) as header:
-    lines = [line.strip() for line in header.readlines()]
-    license_lines = extract_license_lines(lines)
-
-  for header_name in headers:
-    with open(header_name) as header:
-      lines = [line.strip() for line in header.readlines()]
-      apis = apis + extract_apis(lines)
-
-  generate_apis(apis)
-
-
-if __name__ == '__main__':
-  main()

diff --git a/dependencies/ocl-stubs/stubs.cpp b/dependencies/ocl-stubs/stubs.cpp
deleted file mode 100644
index 2cf3700..0000000
--- a/dependencies/ocl-stubs/stubs.cpp
+++ /dev/null

@@ -1,34 +0,0 @@
-#include <CL/cl.h>
-#include <CL/cl_gl.h>
-#include <CL/cl_egl.h>
-#include <CL/cl_ext.h>
-#include <CL/cl_gl_ext.h>
-#include <dlfcn.h>
-
-
-void* libHandle = nullptr;
-
-#define FUNC_TYPES(rettype, fname, fargs, callArgs)     \
-typedef rettype (* fname ## _t) fargs;
-
-#define FUNC_SYM(rettype, fname, fargs, callArgs)                                                     \
-rettype fname fargs {                                                                                 \
-  if (!libHandle)                                                                                     \
-    libHandle = dlopen( "libOpenCL.so", RTLD_NOW | RTLD_GLOBAL );                                     \
-                                                                                                      \
-  static fname ## _t func = nullptr;                                                                  \
-  if (!func)                                                                                          \
-    func = reinterpret_cast< fname ## _t >(dlsym(libHandle, #fname));                                 \
-                                                                                                      \
-  return func callArgs;                                                                               \
-                                                                                                      \
-}
-
-#define CL_MACRO FUNC_TYPES
-#include "apis.h"
-#undef CL_MACRO
-
-#define CL_MACRO FUNC_SYM
-#include "apis.h"
-#undef CL_MACRO
-

diff --git a/presubmit.sh b/presubmit.sh
deleted file mode 100755
index 6fc037c..0000000
--- a/presubmit.sh
+++ /dev/null

@@ -1,72 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-export TOP=$(pwd)
-
-TOOLCHAIN_URL_arm="https://releases.linaro.org/components/toolchain/binaries/7.5-2019.12/arm-linux-gnueabihf/gcc-linaro-7.5.0-2019.12-x86_64_arm-linux-gnueabihf.tar.xz"
-TOOLCHAIN_URL_aarch64="https://releases.linaro.org/components/toolchain/binaries/7.5-2019.12/aarch64-linux-gnu/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz"
-
-TOOLCHAIN_PREFIX_arm=arm-linux-gnueabihf
-TOOLCHAIN_PREFIX_aarch64=aarch64-linux-gnu
-
-TOOLCHAIN_FILE=${TOP}/toolchain.cmake
-touch ${TOOLCHAIN_FILE}
-BUILD_OPENGL_TEST="OFF"
-
-# Prepare toolchain if needed
-if [[ ${JOB_ARCHITECTURE} != "" ]]; then
-    TOOLCHAIN_URL_VAR=TOOLCHAIN_URL_${JOB_ARCHITECTURE}
-    TOOLCHAIN_URL=${!TOOLCHAIN_URL_VAR}
-    wget ${TOOLCHAIN_URL}
-    TOOLCHAIN_ARCHIVE=${TOOLCHAIN_URL##*/}
-    tar xf ${TOOLCHAIN_ARCHIVE}
-    TOOLCHAIN_DIR=${TOP}/${TOOLCHAIN_ARCHIVE%.tar.xz}
-    export PATH=${TOOLCHAIN_DIR}/bin:${PATH}
-
-    TOOLCHAIN_PREFIX_VAR=TOOLCHAIN_PREFIX_${JOB_ARCHITECTURE}
-    TOOLCHAIN_PREFIX=${!TOOLCHAIN_PREFIX_VAR}
-
-    echo "SET(CMAKE_SYSTEM_NAME Linux)" >> ${TOOLCHAIN_FILE}
-    echo "SET(CMAKE_SYSTEM_PROCESSOR ${JOB_ARCHITECTURE})" >> ${TOOLCHAIN_FILE}
-    echo "SET(CMAKE_C_COMPILER   ${TOOLCHAIN_PREFIX}-gcc)" >> ${TOOLCHAIN_FILE}
-    echo "SET(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++)" >> ${TOOLCHAIN_FILE}
-    echo "SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> ${TOOLCHAIN_FILE}
-    echo "SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> ${TOOLCHAIN_FILE}
-    echo "SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> ${TOOLCHAIN_FILE}
-fi
-
-if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then
-    BUILD_OPENGL_TEST="ON"
-    sudo apt-get update
-    sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev
-fi
-# Prepare headers
-git clone https://github.com/KhronosGroup/OpenCL-Headers.git
-cd OpenCL-Headers
-ln -s CL OpenCL # For OSX builds
-cd ..
-
-# Get and build loader
-git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
-cd ${TOP}/OpenCL-ICD-Loader
-mkdir build
-cd build
-cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ ..
-make
-
-# Build CTS
-cd ${TOP}
-ls -l
-mkdir build
-cd build
-cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
-      -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
-      -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
-      -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
-      -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \
-      -DUSE_CL_EXPERIMENTAL=ON \
-      -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \
-      ..
-make -j2
-

diff --git a/scripts/android_bp_head b/scripts/android_bp_head
deleted file mode 100644
index c5cd394..0000000
--- a/scripts/android_bp_head
+++ /dev/null

@@ -1,121 +0,0 @@
-// *** THIS PACKAGE HAS SPECIAL LICENSING CONDITIONS.  PLEASE
-//     CONSULT THE OWNERS AND opensource-licensing@google.com BEFORE
-//     DEPENDING ON IT IN YOUR PROJECT. ***
-package {
-    default_applicable_licenses: ["external_OpenCL-CTS_license"],
-}
-
-// Added automatically by a large-scale-change that took the approach of
-// 'apply every license found to every target'. While this makes sure we respect
-// every license restriction, it may not be entirely correct.
-//
-// e.g. GPL in an MIT project might only apply to the contrib/ directory.
-//
-// Please consider splitting the single license below into multiple licenses,
-// taking care not to lose any license_kind information, and overriding the
-// default license using the 'licenses: [...]' property on targets as needed.
-//
-// For unused files, consider creating a 'fileGroup' with "//visibility:private"
-// to attach the license to, and including a comment whether the files may be
-// used in the current project.
-// See: http://go/android-license-faq
-license {
-    name: "external_OpenCL-CTS_license",
-    visibility: [":__subpackages__"],
-    license_kinds: [
-        "SPDX-license-identifier-Apache-2.0",
-        "SPDX-license-identifier-BSD",
-        "SPDX-license-identifier-MIT",
-        "SPDX-license-identifier-Unlicense",
-        "legacy_by_exception_only", // by exception only
-        "legacy_proprietary", // by exception only
-        "legacy_unencumbered",
-    ],
-    license_text: [
-        "LICENSE.txt",
-    ],
-}
-
-cc_library_headers {
-    name: "ocl-harness-headers",
-    export_include_dirs: [
-        "test_common/harness",
-        "test_common"
-    ]
-}
-
-cc_defaults {
-    name: "ocl-harness-defaults",
-    header_libs: [
-        "ocl-harness-headers",
-    ],
-    export_header_lib_headers: [
-        "ocl-harness-headers",
-    ],
-    cflags: [
-        "-DCL_EXPERIMENTAL",
-        "-DCL_TARGET_OPENCL_VERSION=300",
-        "-Wno-#warnings",
-        "-Wno-absolute-value",
-        "-Wno-asm-operand-widths",
-        "-Wno-c++11-narrowing",
-        "-Wno-dangling-else",
-        "-Wno-date-time",
-        "-Wno-deprecated-declarations",
-        "-Wno-format",
-        "-Wno-ignored-pragmas",
-        "-Wno-ignored-qualifiers",
-        "-Wno-implicit-fallthrough",
-        "-Wno-logical-op-parentheses",
-        "-Wno-macro-redefined",
-        "-Wno-missing-braces",
-        "-Wno-missing-declarations",
-        "-Wno-missing-field-initializers",
-        "-Wno-non-virtual-dtor",
-        "-Wno-overloaded-virtual",
-        "-Wno-parentheses",
-        "-Wno-parentheses-equality",
-        "-Wno-reorder-ctor",
-        "-Wno-return-stack-address",
-        "-Wno-shift-negative-value",
-        "-Wno-sometimes-uninitialized",
-        "-Wno-switch",
-        "-Wno-unknown-pragmas",
-        "-Wno-unneeded-internal-declaration",
-        "-Wno-unused-function",
-        "-Wno-unused-label",
-        "-Wno-unused-parameter",
-        "-Wno-unused-variable",
-        "-Wno-writable-strings",
-        "-fexceptions",
-    ],
-    static_libs: [
-        "ocl-stubs"
-    ],
-}
-
-cc_library {
-    name: "ocl-harness",
-    srcs: [ "test_common/harness/*.cpp", ],
-    defaults: [ "ocl-harness-defaults" ],
-}
-
-cc_defaults {
-    name: "ocl-test-defaults",
-    defaults: [ "ocl-harness-defaults" ],
-    static_libs: [ "ocl-harness" ],
-    compile_multilib: "64",
-    multilib: {
-        lib64: {
-            suffix: "64",
-        },
-    },
-}
-
-cc_defaults {
-    name: "ocl-test-image-defaults",
-    srcs: [ "test_conformance/images/common.cpp" ],
-    export_include_dirs: [ "test_conformance/images" ],
-    defaults: [ "ocl-test-defaults" ],
-}
-

diff --git a/scripts/android_bp_tail b/scripts/android_bp_tail
deleted file mode 100644
index c295fc1..0000000
--- a/scripts/android_bp_tail
+++ /dev/null

@@ -1,18 +0,0 @@
-python_test_host {
-    name: "opencl_cts",
-    main: "scripts/test_opencl_cts.py",
-    srcs: [ "scripts/test_opencl_cts.py" ],
-    data: [ "scripts/test_opencl_cts.xml" ],
-    test_config: "scripts/test_opencl_cts.xml",
-    version: {
-        py2: {
-            enabled: false,
-        },
-        py3: {
-            enabled: true
-        }
-    },
-    test_options: {
-        unit_test: false,
-    },
-}

diff --git a/scripts/generate_test_files.py b/scripts/generate_test_files.py
deleted file mode 100644
index 3d916f6..0000000
--- a/scripts/generate_test_files.py
+++ /dev/null

@@ -1,158 +0,0 @@
-import json
-import os
-import re
-from xml.dom import minidom
-from xml.etree import ElementTree
-
-
-SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
-TEST_JSON = 'tests.json'
-TEST_JSON_PATH = os.path.join(SCRIPT_DIR, TEST_JSON)
-
-
-def write_one_cc_test(test_details, f):
-  # TODO(b/161524664): Remove this exception for spir
-  if test_details['test_name'] == 'spir':
-    return
-
-  stringified_sources = map(lambda s: f'"{s}"', test_details['srcs'])
-  stringified_data = map(lambda s: f'"{s}"', test_details.get('data', []))
-  stringified_cflags = map(lambda s: f'"{s}"', test_details.get('cflags', []))
-
-  default = "ocl-test-defaults"
-  if test_details.get('image_type', False):
-    default = "ocl-test-image-defaults"
-
-  rtti = test_details.get('rtti', False)
-
-  cc_test_string = """
-cc_test {{
-    name: "{}",
-    srcs: [ {} ],
-    data: [ {} ],
-    cflags: [ {} ],
-    defaults: [ "{}" ],
-    rtti: {},
-    gtest: false
-}}
-
-""".format(test_details['binary_name'],
-           ", ".join(stringified_sources),
-           ", ".join(stringified_data),
-           ", ".join(stringified_cflags),
-           default,
-           (str(rtti)).lower())
-
-  empty_field_regex = re.compile("^\s*\w+: \[\s*\],?$")
-  cc_test_string = '\n'.join([line for line in cc_test_string.split('\n')
-                                   if not empty_field_regex.match(line)])
-  f.write(cc_test_string)
-
-
-def generate_android_bp():
-  android_bp_head_path = os.path.join(SCRIPT_DIR, 'android_bp_head')
-  android_bp_tail_path = os.path.join(SCRIPT_DIR, 'android_bp_tail')
-
-  with open('Android.bp', 'w') as android_bp:
-    with open(android_bp_head_path, 'r') as android_bp_head:
-      android_bp.write(android_bp_head.read())
-
-    with open(TEST_JSON_PATH) as f:
-      tests = json.load(f)
-    for test in tests:
-      write_one_cc_test(test, android_bp)
-
-    with open(android_bp_tail_path, 'r') as android_bp_tail:
-      android_bp.write(android_bp_tail.read())
-
-
-def create_subelement_with_attribs(element, tag, attribs):
-  subelement = ElementTree.SubElement(element, tag)
-
-  for key, value in attribs.items():
-    subelement.attrib[key] = value
-
-  return subelement
-
-
-def generate_push_file_rules(configuration):
-  create_subelement_with_attribs(configuration, 'target_preparer',
-      { 'class': "com.android.tradefed.targetprep.RootTargetPreparer" })
-  file_pusher = create_subelement_with_attribs(configuration, 'target_preparer',
-      { 'class': "com.android.compatibility.common.tradefed.targetprep.FilePusher" })
-  create_subelement_with_attribs(file_pusher, 'option',
-      { 'name': "cleanup", 'value': "true" })
-  create_subelement_with_attribs(file_pusher, 'option',
-      { 'name': "append-bitness", 'value': "true" })
-
-  with open(TEST_JSON_PATH, "r") as f:
-    tests = json.load(f)
-
-  for test in tests:
-    if test.get('manual_only', False):
-      continue
-
-    create_subelement_with_attribs(file_pusher, 'option',
-        {
-          'name': "push-file",
-          'key': test['binary_name'],
-          'value': "/data/nativetest64/unrestricted/{}".format(test['binary_name'])
-        })
-
-
-def generate_test_rules(configuration):
-  with open(TEST_JSON_PATH, "r") as f:
-    tests = json.load(f)
-
-  for test in tests:
-    if test.get('manual_only', False):
-      continue
-
-    test_rule = create_subelement_with_attribs(configuration, 'test',
-        { 'class': "com.android.tradefed.testtype.python.PythonBinaryHostTest" })
-
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "par-file-name", 'value': "opencl_cts" })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "inject-android-serial", 'value': "true" })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "test-timeout", 'value': test.get('timeout', "30m") })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "python-options", 'value': test["test_name"] })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "python-options",
-          'value': "/data/nativetest64/unrestricted/{}".format(test['binary_name']) })
-
-    for arg in test.get('arguments', []):
-      create_subelement_with_attribs(test_rule, 'option',
-          { 'name': "python-options", 'value': arg })
-
-
-def generate_test_xml():
-  configuration = ElementTree.Element('configuration')
-  configuration.attrib['description'] = "Config to run OpenCL CTS"
-
-  logcat = ElementTree.SubElement(configuration, 'option')
-  logcat.attrib['name'] = "logcat-on-failure"
-  logcat.attrib['value'] = "false"
-
-  generate_push_file_rules(configuration)
-  generate_test_rules(configuration)
-
-  stringified_configuration = ElementTree.tostring(configuration, 'utf-8')
-  reparsed_configuration = minidom.parseString(stringified_configuration)
-  with open('test_opencl_cts.xml', 'w') as f:
-    f.write(reparsed_configuration.toprettyxml(indent=" "*4))
-
-
-def main():
-  generate_android_bp()
-  generate_test_xml()
-
-  print("Don't forget to move -")
-  print("    Android.bp -> {ANDROID_ROOT}/external/OpenCL-CTS/Android.bp")
-  print("    test_opencl_cts.xml -> {ANDROID_ROOT}/external/OpenCL-CTS/scripts/test_opencl_cts.xml")
-
-
-if __name__ == '__main__':
-  main()

diff --git a/scripts/test_opencl_cts.py b/scripts/test_opencl_cts.py
deleted file mode 100644
index afe0c34..0000000
--- a/scripts/test_opencl_cts.py
+++ /dev/null

@@ -1,132 +0,0 @@
-#!/usr/bin/env python3
-
-from typing import List, Optional, Tuple
-
-import argparse
-import os
-import pipes
-import subprocess
-import sys
-import unittest
-
-ANDROID_RUNNER_REQUIRED_VERBOSITY = 2
-
-
-def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
-  parser = argparse.ArgumentParser()
-  parser.add_argument('test_name', type=str, help="Name of the test")
-  parser.add_argument('binary_path', type=str,
-                      help="Full path to the binary on device")
-  parser.add_argument('--subtests', type=str, nargs='*',
-                      help="Specific subtests to run")
-  parser.add_argument('--test_args', type=str, nargs='*',
-                      help="Unfiltered arguments to pass to the run command")
-
-  args = parser.parse_args(args)
-  args.subtests = args.subtests or []
-  args.test_args = args.test_args or []
-
-  return args
-
-
-def run_command(command: str) -> Tuple[int, str, str]:
-  serial_number = os.environ.get("ANDROID_SERIAL", "")
-  if not serial_number:
-    raise "$ANDROID_SERIAL is empty, device must be specified"
-
-  full_command = ["adb", "-s", serial_number, "shell", command]
-  ret = subprocess.run(
-      full_command, capture_output=True, universal_newlines=True)
-  return ret.returncode, ret.stdout, ret.stderr
-
-
-def get_all_subtests(binary_path: str) -> List[str]:
-  retcode, output, _ = run_command(f'{binary_path} --help')
-
-  test_name_line = "Test names"
-  index = output.find(test_name_line)
-  if index == -1:
-    return []
-
-  test_names_output = output[index:]
-  test_names = []
-  # Skip the first line which starts with "Test names"
-  for test_name in test_names_output.splitlines()[1:]:
-    if not test_name.startswith((" ", "\t")):
-      break
-    test_names.append(test_name.strip())
-
-  return test_names
-
-
-def get_subtests(binary_path: str, subtests: List[str]) -> List[str]:
-  all_subtests = set(get_all_subtests(binary_path))
-  if not subtests:
-    return all_subtests
-
-  subtests = set(subtests)
-  selected_subtests = subtests & all_subtests
-  remaining_subtests = subtests - all_subtests
-
-  if remaining_subtests:
-    print("Could not find subtests: {}".format(', '.join(remaining_subtests)),
-          file=sys.stderr)
-
-  return sorted(list(selected_subtests))
-
-
-class OpenCLTest(unittest.TestCase):
-
-  def __init__(self, test_name: str, binary_path: str, args: List[str]):
-
-    self._test_name = test_name
-    self._binary_path = binary_path
-    self._args = args
-
-    self.command = " ".join(
-        [self._binary_path, self._test_name] +
-        list(map(pipes.quote, self._args))
-    )
-
-    self.test_func_name = self._test_name
-    setattr(self, self.test_func_name, self.genericTest)
-
-    super().__init__(methodName=self.test_func_name)
-
-  def genericTest(self):
-    retcode, output, oerror = run_command(self.command)
-
-    # TODO(layog): CTS currently return non-zero return code if the
-    # implementation is missing for some API even if the API is not supported by
-    # the version reported by the driver. Need to patch upstream.
-    missing_line = f"ERROR: Test '{self._test_name}' is missing implementation"
-    if missing_line in output or missing_line in oerror:
-      self.skipTest(f"{self._test_name} API not available in the driver")
-
-    self.assertFalse(retcode, "Test exited with non-zero status")
-
-    # TODO(b/158646251): Update upstream to exit with proper error code
-    passed_line = "PASSED test."
-    self.assertTrue(passed_line in output)
-
-
-def main():
-  """main entrypoint for test runner"""
-  args = parse_args(sys.argv[1:])
-
-  # HACK: Name hack to report the actual test name
-  OpenCLTest.__name__ = args.test_name
-  OpenCLTest.__qualname__ = args.test_name
-
-  suite = unittest.TestSuite()
-  subtests = get_subtests(args.binary_path, args.subtests)
-  for subtest in subtests:
-    suite.addTest(OpenCLTest(subtest, args.binary_path, args.test_args))
-
-  runner = unittest.TextTestRunner(
-      stream=sys.stderr, verbosity=ANDROID_RUNNER_REQUIRED_VERBOSITY)
-  runner.run(suite)
-
-
-if __name__ == "__main__":
-  main()

diff --git a/scripts/test_opencl_cts.xml b/scripts/test_opencl_cts.xml
deleted file mode 100644
index de9c610..0000000
--- a/scripts/test_opencl_cts.xml
+++ /dev/null

@@ -1,257 +0,0 @@
-<?xml version="1.0" ?>
-<configuration description="Config to run OpenCL CTS">
-    <option name="logcat-on-failure" value="false"/>
-    <target_preparer class="com.android.tradefed.targetprep.RootTargetPreparer"/>
-    <target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
-        <option name="cleanup" value="true"/>
-        <option name="append-bitness" value="true"/>
-        <option key="ocl-test-allocations" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-allocations"/>
-        <option key="ocl-test-api" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-api"/>
-        <option key="ocl-test-atomics" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-atomics"/>
-        <option key="ocl-test-basic" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-basic"/>
-        <option key="ocl-test-buffers" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-buffers"/>
-        <option key="ocl-test-c11-atomics" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-c11-atomics"/>
-        <option key="ocl-test-commonfns" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-commonfns"/>
-        <option key="ocl-test-computeinfo" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-computeinfo"/>
-        <option key="ocl-test-contractions" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-contractions"/>
-        <option key="ocl-test-device-execution" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-device-execution"/>
-        <option key="ocl-test-device-partition" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-device-partition"/>
-        <option key="ocl-test-device-timer" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-device-timer"/>
-        <option key="ocl-test-events" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-events"/>
-        <option key="ocl-test-generic-address-space" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-generic-address-space"/>
-        <option key="ocl-test-geometrics" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-geometrics"/>
-        <option key="ocl-test-mem-host-flags" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-mem-host-flags"/>
-        <option key="ocl-test-multiple-device-context" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-multiple-device-context"/>
-        <option key="ocl-test-non-uniform-work-group" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-non-uniform-work-group"/>
-        <option key="ocl-test-pipes" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-pipes"/>
-        <option key="ocl-test-profiling" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-profiling"/>
-        <option key="ocl-test-relationals" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-relationals"/>
-        <option key="ocl-test-subgroups" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-subgroups"/>
-        <option key="ocl-test-svm" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-svm"/>
-        <option key="ocl-test-thread-dimensions" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-thread-dimensions"/>
-        <option key="ocl-test-vectors" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-vectors"/>
-        <option key="ocl-test-image-clcopyimage" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clcopyimage"/>
-        <option key="ocl-test-image-clfillimage" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clfillimage"/>
-        <option key="ocl-test-image-clgetinfo" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clgetinfo"/>
-        <option key="ocl-test-image-clreadwriteimage" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clreadwriteimage"/>
-        <option key="ocl-test-image-kernel-image-methods" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-kernel-image-methods"/>
-        <option key="ocl-test-image-samplerlessreads" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-samplerlessreads"/>
-    </target_preparer>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="allocations"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-allocations"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="api"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-api"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="atomics"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-atomics"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="basic"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-basic"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="buffers"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-buffers"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="120m"/>
-        <option name="python-options" value="c11-atomics"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-c11-atomics"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="commonfns"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-commonfns"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="computeinfo"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-computeinfo"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="contractions"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-contractions"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="device-execution"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-device-execution"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="device-partition"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-device-partition"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="device-timer"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-device-timer"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="events"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-events"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="generic-address-space"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-generic-address-space"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="geometrics"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-geometrics"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="mem-host-flags"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-mem-host-flags"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="multiple-device-context"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-multiple-device-context"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="non-uniform-work-group"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-non-uniform-work-group"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="pipes"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-pipes"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="profiling"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-profiling"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="relationals"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-relationals"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="subgroups"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-subgroups"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="svm"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-svm"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="thread-dimensions"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-thread-dimensions"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="vectors"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-vectors"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clcopyimage"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clcopyimage"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clfillimage"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clfillimage"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clgetinfo"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clgetinfo"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clreadwriteimage"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clreadwriteimage"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="kernel-image-methods"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-kernel-image-methods"/>
-    </test>
-    <test class="com.android.tradefed.testtype.python.PythonBinaryHostTest">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="samplerlessreads"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-samplerlessreads"/>
-    </test>
-</configuration>

diff --git a/scripts/tests.json b/scripts/tests.json
deleted file mode 100644
index 769c1d7..0000000
--- a/scripts/tests.json
+++ /dev/null

@@ -1,325 +0,0 @@
-[
-    {
-        "test_name": "allocations",
-        "binary_name": "ocl-test-allocations",
-        "srcs": [
-            "test_conformance/allocations/*.cpp"
-        ]
-    },
-    {
-        "test_name": "api",
-        "binary_name": "ocl-test-api",
-        "srcs": [
-            "test_conformance/api/*.cpp"
-        ]
-    },
-    {
-        "test_name": "atomics",
-        "binary_name": "ocl-test-atomics",
-        "srcs": [
-            "test_conformance/atomics/*.cpp"
-        ]
-    },
-    {
-        "test_name": "basic",
-        "binary_name": "ocl-test-basic",
-        "srcs": [
-            "test_conformance/basic/*.cpp"
-        ]
-    },
-    {
-        "test_name": "buffers",
-        "binary_name": "ocl-test-buffers",
-        "srcs": [
-            "test_conformance/buffers/*.cpp"
-        ]
-    },
-    {
-        "test_name": "c11-atomics",
-        "binary_name": "ocl-test-c11-atomics",
-        "srcs": [
-            "test_conformance/c11_atomics/*.cpp"
-        ],
-        "timeout": "120m"
-    },
-    {
-        "test_name": "commonfns",
-        "binary_name": "ocl-test-commonfns",
-        "srcs": [
-            "test_conformance/commonfns/*.cpp"
-        ]
-    },
-    {
-        "test_name": "compiler",
-        "binary_name": "ocl-test-compiler",
-        "srcs": [
-            "test_conformance/compiler/*.cpp"
-        ],
-        "data": [
-            "test_conformance/compiler/includeTestDirectory/testIncludeFile.h",
-            "test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "computeinfo",
-        "binary_name": "ocl-test-computeinfo",
-        "srcs": [
-            "test_conformance/computeinfo/*.cpp"
-        ]
-    },
-    {
-        "test_name": "contractions",
-        "binary_name": "ocl-test-contractions",
-        "srcs": [
-            "test_conformance/contractions/*.cpp"
-        ]
-    },
-    {
-        "test_name": "conversions",
-        "binary_name": "ocl-test-conversions",
-        "srcs": [
-            "test_conformance/conversions/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "device-execution",
-        "binary_name": "ocl-test-device-execution",
-        "srcs": [
-            "test_conformance/device_execution/*.cpp"
-        ]
-    },
-    {
-        "test_name": "device-partition",
-        "binary_name": "ocl-test-device-partition",
-        "srcs": [
-            "test_conformance/device_partition/*.cpp"
-        ]
-    },
-    {
-        "test_name": "device-timer",
-        "binary_name": "ocl-test-device-timer",
-        "srcs": [
-            "test_conformance/device_timer/*.cpp"
-        ]
-    },
-    {
-        "test_name": "events",
-        "binary_name": "ocl-test-events",
-        "srcs": [
-            "test_conformance/events/*.cpp"
-        ]
-    },
-    {
-        "test_name": "generic-address-space",
-        "binary_name": "ocl-test-generic-address-space",
-        "srcs": [
-            "test_conformance/generic_address_space/*.cpp"
-        ]
-    },
-    {
-        "test_name": "geometrics",
-        "binary_name": "ocl-test-geometrics",
-        "srcs": [
-            "test_conformance/geometrics/*.cpp"
-        ]
-    },
-    {
-        "test_name": "half",
-        "binary_name": "ocl-test-half",
-        "srcs": [
-            "test_conformance/half/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "integer-ops",
-        "binary_name": "ocl-test-integer-ops",
-        "srcs": [
-            "test_conformance/integer_ops/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "math-brute-force",
-        "binary_name": "ocl-test-math-brute-force",
-        "srcs": [
-            "test_conformance/math_brute_force/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "mem-host-flags",
-        "binary_name": "ocl-test-mem-host-flags",
-        "srcs": [
-            "test_conformance/mem_host_flags/*.cpp"
-        ]
-    },
-    {
-        "test_name": "multiple-device-context",
-        "binary_name": "ocl-test-multiple-device-context",
-        "srcs": [
-            "test_conformance/multiple_device_context/*.cpp"
-        ]
-    },
-    {
-        "test_name": "non-uniform-work-group",
-        "binary_name": "ocl-test-non-uniform-work-group",
-        "srcs": [
-            "test_conformance/non_uniform_work_group/*.cpp"
-        ]
-    },
-    {
-        "test_name": "pipes",
-        "binary_name": "ocl-test-pipes",
-        "srcs": [
-            "test_conformance/pipes/*.cpp"
-        ]
-    },
-    {
-        "test_name": "printf",
-        "binary_name": "ocl-test-printf",
-        "srcs": [
-            "test_conformance/printf/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "profiling",
-        "binary_name": "ocl-test-profiling",
-        "srcs": [
-            "test_conformance/profiling/*.cpp"
-        ]
-    },
-    {
-        "test_name": "relationals",
-        "binary_name": "ocl-test-relationals",
-        "srcs": [
-            "test_conformance/relationals/*.cpp"
-        ]
-    },
-    {
-        "test_name": "select",
-        "binary_name": "ocl-test-select",
-        "srcs": [
-            "test_conformance/select/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "spir",
-        "binary_name": "ocl-test-spir",
-        "srcs": [
-            "test_conformance/spir/*.cpp",
-            "test_conformance/math_brute_force/FunctionList.cpp",
-            "test_common/miniz/miniz.c"
-        ],
-        "data": [ "test_conformance/spir/*.zip" ],
-        "cflags": [
-            "-DFUNCTION_LIST_ULPS_ONLY",
-            "-Wno-unused-private-field"
-        ],
-        "rtti": true,
-        "manual_only": true
-    },
-    {
-        "test_name": "spirv-new",
-        "binary_name": "ocl-test-spirv-new",
-        "srcs": [
-            "test_conformance/spirv_new/*.cpp",
-            "test_conformance/math_brute_force/reference_math.cpp",
-            "test_conformance/math_brute_force/utility.cpp"
-        ],
-        "data": [
-            "test_conformance/spirv_new/spirv_asm/*",
-            "test_conformance/spirv_new/spirv_bin/*"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "subgroups",
-        "binary_name": "ocl-test-subgroups",
-        "srcs": [
-            "test_conformance/subgroups/*.cpp"
-        ]
-    },
-    {
-        "test_name": "svm",
-        "binary_name": "ocl-test-svm",
-        "srcs": [
-            "test_conformance/SVM/*.cpp"
-        ]
-    },
-    {
-        "test_name": "thread-dimensions",
-        "binary_name": "ocl-test-thread-dimensions",
-        "srcs": [
-            "test_conformance/thread_dimensions/*.cpp"
-        ]
-    },
-    {
-        "test_name": "vectors",
-        "binary_name": "ocl-test-vectors",
-        "srcs": [
-            "test_conformance/vectors/*.cpp"
-        ]
-    },
-    {
-        "test_name": "clcopyimage",
-        "binary_name": "ocl-test-image-clcopyimage",
-        "srcs": [
-            "test_conformance/images/clCopyImage/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "clfillimage",
-        "binary_name": "ocl-test-image-clfillimage",
-        "srcs": [
-            "test_conformance/images/clFillImage/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "clgetinfo",
-        "binary_name": "ocl-test-image-clgetinfo",
-        "srcs": [
-            "test_conformance/images/clGetInfo/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "clreadwriteimage",
-        "binary_name": "ocl-test-image-clreadwriteimage",
-        "srcs": [
-            "test_conformance/images/clReadWriteImage/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "kernel-image-methods",
-        "binary_name": "ocl-test-image-kernel-image-methods",
-        "srcs": [
-            "test_conformance/images/kernel_image_methods/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "kernel-read-write",
-        "binary_name": "ocl-test-image-kernel-read-write",
-        "srcs": [
-            "test_conformance/images/kernel_read_write/*.cpp"
-        ],
-        "manual_only": true,
-        "image_type": true
-    },
-    {
-        "test_name": "samplerlessreads",
-        "binary_name": "ocl-test-image-samplerlessreads",
-        "srcs": [
-            "test_conformance/images/samplerlessReads/*.cpp"
-        ],
-        "image_type": true
-    }
-]

diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt
index 2d4bc19..e890a67 100644
--- a/test_common/CMakeLists.txt
+++ b/test_common/CMakeLists.txt

@@ -8,14 +8,12 @@
     harness/msvc9.c
     harness/crc32.cpp
     harness/errorHelpers.cpp
-    harness/featureHelpers.cpp
     harness/genericThread.cpp
     harness/imageHelpers.cpp
     harness/kernelHelpers.cpp
     harness/deviceInfo.cpp
     harness/os_helpers.cpp
     harness/parseParameters.cpp
-    harness/propertyHelpers.cpp
     harness/testHarness.cpp
     harness/ThreadPool.cpp
     miniz/miniz.c

diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp
index def78d7..c02a4b1 100644
--- a/test_common/gl/helpers.cpp
+++ b/test_common/gl/helpers.cpp

@@ -14,7 +14,11 @@
 // limitations under the License.
 //
 #include "helpers.h"
-#include "harness/imageHelpers.h"
+#include "../harness/imageHelpers.h"
+
+// convert_float_to_half and convert_half_to_float may be found in test_conformance/images/image_helpers.cpp
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
 
 #if defined( __APPLE__ )
     #include <OpenGL/glu.h>
@@ -1277,7 +1281,9 @@
         case kUInt:
         *((unsigned int*)p) = val*0xffffffff;
           break;
-        case kHalf: *((cl_half *)p) = convert_float_to_half(val); break;
+        case kHalf:
+          *((cl_ushort*)p) = convert_float_to_half(val);
+          break;
         default:
           log_error("Test error: unexpected type enum 0x%x\n",type);
       }
@@ -1539,7 +1545,9 @@
         case kUInt:
           *((unsigned int*)p) = val*0xffffffff;
           break;
-        case kHalf: *((cl_half *)p) = convert_float_to_half(val); break;
+        case kHalf:
+          *((cl_ushort*)p) = convert_float_to_half(val);
+          break;
         default:
           log_error("Test error: unexpected type enum 0x%x\n",type);
       }

diff --git a/test_common/gles/gl_headers.h b/test_common/gles/gl_headers.h
index e0d4632..849da71 100644
--- a/test_common/gles/gl_headers.h
+++ b/test_common/gles/gl_headers.h

@@ -24,12 +24,10 @@
 #include <GLES3/gl3.h>
 #else
 #include <GLES2/gl2.h>
-#define glTexImage3DOES glTexImage3D
-#define glUnmapBufferOES glUnmapBuffer
-#define glMapBufferRangeEXT glMapBufferRange
 #endif
 
 #include <GLES2/gl2ext.h>
+#include <GLES2/gl2extQCOM.h>
 
 // Some macros to minimize the changes in the tests from GL to GLES2
 #define glGenRenderbuffersEXT        glGenRenderbuffers
@@ -42,26 +40,21 @@
 #define glDeleteFramebuffersEXT      glDeleteFramebuffers
 #define glBindFramebufferEXT         glBindFramebuffer
 #define glFramebufferRenderbufferEXT glFramebufferRenderbuffer
-
-#ifndef GL_ES_VERSION_3_0
-#define GL_RGBA32F GL_RGBA32F_EXT
-#define GL_READ_ONLY GL_BUFFER_ACCESS_OES
-#define GL_HALF_FLOAT_ARB GL_HALF_FLOAT_OES
-#define GL_BGRA GL_BGRA_EXT
-#else
-#define GL_HALF_FLOAT_ARB GL_HALF_FLOAT
-#endif
-
+#define glTexImage3D                 glTexImage3DOES
 #define glutGetProcAddress           eglGetProcAddress
 
 #define GL_FRAMEBUFFER_EXT           GL_FRAMEBUFFER
 #define GL_FRAMEBUFFER_COMPLETE_EXT  GL_FRAMEBUFFER_COMPLETE
 #define GL_RENDERBUFFER_INTERNAL_FORMAT_EXT GL_RENDERBUFFER_INTERNAL_FORMAT
 #define GL_RENDERBUFFER_EXT          GL_RENDERBUFFER
+#define GL_COLOR_ATTACHMENT0_EXT     GL_COLOR_ATTACHMENT0
 #define GL_DEPTH_ATTACHMENT_EXT      GL_DEPTH_ATTACHMENT
+#define GL_TEXTURE_3D                GL_TEXTURE_3D_OES
+#define GL_READ_ONLY                 GL_BUFFER_ACCESS_OES
 
+#define GL_HALF_FLOAT_ARB            GL_HALF_FLOAT_OES
+#define GL_BGRA                      GL_BGRA_EXT
 #define GL_RGBA32F_ARB               GL_RGBA
-#define GL_BGRA GL_BGRA_EXT
 
 typedef unsigned short GLhalf;
 

diff --git a/test_common/gles/helpers.cpp b/test_common/gles/helpers.cpp
index 34f40b4..188f903 100644
--- a/test_common/gles/helpers.cpp
+++ b/test_common/gles/helpers.cpp

@@ -16,7 +16,6 @@
 #include "helpers.h"
 
 #include "gl_headers.h"
-#include "CL/cl_half.h"
 
 #define CHECK_ERROR()\
     {GLint __error = glGetError(); if(__error) {log_error( "GL ERROR: %s!\n", gluErrorString( err ));}}
@@ -962,6 +961,49 @@
     }
 }
 
+cl_ushort float2half_rte( float f )
+{
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
+}
+
 void* CreateRandomData( ExplicitType type, size_t count, MTdata d )
 {
     switch(type)
@@ -1058,8 +1100,7 @@
 
             for( size_t i = 0; i < count; i++ )
             {
-                p[i] = cl_half_from_float(get_random_float(0.f, 1.f, d),
-                                          CL_HALF_RTE);
+                p[ i ] = float2half_rte(get_random_float( 0.f, 1.f, d ));
             }
 
             return (void*)p;

diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp
index 31985aa..c329452 100644
--- a/test_common/harness/ThreadPool.cpp
+++ b/test_common/harness/ThreadPool.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,10 +19,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
-// or any other POSIX system
+#if  defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 )  // or any other POSIX system
 
-#if defined(_WIN32)
+#if defined( _WIN32 )
 #include <windows.h>
 #if defined(_MSC_VER)
 #include <intrin.h>
@@ -39,89 +38,84 @@
 #endif // !_WIN32
 
 // declarations
-#ifdef _WIN32
-void ThreadPool_WorkerFunc(void *p);
+#ifdef  _WIN32
+void ThreadPool_WorkerFunc( void *p );
 #else
-void *ThreadPool_WorkerFunc(void *p);
+void *ThreadPool_WorkerFunc( void *p );
 #endif
 void ThreadPool_Init(void);
 void ThreadPool_Exit(void);
 
-#if defined(__MINGW32__)
-// Mutex for implementing super heavy atomic operations if you don't have GCC or
-// MSVC
-CRITICAL_SECTION gAtomicLock;
-#elif defined(__GNUC__) || defined(_MSC_VER)
+#if defined (__MINGW32__)
+    // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
+    CRITICAL_SECTION     gAtomicLock;
+#elif defined( __GNUC__ ) || defined( _MSC_VER)
 #else
-pthread_mutex_t gAtomicLock;
+    pthread_mutex_t     gAtomicLock;
 #endif
 
-// Atomic add operator with mem barrier.  Mem barrier needed to protect state
-// modified by the worker functions.
-cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b)
+// Atomic add operator with mem barrier.  Mem barrier needed to protect state modified by the worker functions.
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
 {
-#if defined(__MINGW32__)
+#if defined (__MINGW32__)
     // No atomics on Mingw32
     EnterCriticalSection(&gAtomicLock);
     cl_int old = *a;
     *a = old + b;
     LeaveCriticalSection(&gAtomicLock);
     return old;
-#elif defined(__GNUC__)
-    // GCC extension:
-    // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
-    return __sync_fetch_and_add(a, b);
-    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether
-    // __sync_fetch_and_add does a synchronize
-#elif defined(_MSC_VER)
-    return (cl_int)_InterlockedExchangeAdd((volatile LONG *)a, (LONG)b);
+#elif defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    return __sync_fetch_and_add( a, b );
+    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether __sync_fetch_and_add does a synchronize
+#elif defined( _MSC_VER )
+    return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
 #else
-#warning Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
-    if (pthread_mutex_lock(&gAtomicLock))
-        log_error("Atomic operation failed. pthread_mutex_lock(&gAtomicLock) "
-                  "returned an error\n");
+    #warning  Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
+    if( pthread_mutex_lock(&gAtomicLock) )
+        log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
     cl_int old = *a;
     *a = old + b;
-    if (pthread_mutex_unlock(&gAtomicLock))
-        log_error("Failed to release gAtomicLock. Further atomic operations "
-                  "may deadlock!\n");
+    if( pthread_mutex_unlock(&gAtomicLock) )
+        log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
     return old;
 #endif
 }
 
-#if defined(_WIN32)
+#if defined( _WIN32 )
 // Uncomment the following line if Windows XP support is not required.
 // #define HAS_INIT_ONCE_EXECUTE_ONCE 1
 
 #if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
-#define _INIT_ONCE INIT_ONCE
-#define _PINIT_ONCE PINIT_ONCE
+#define _INIT_ONCE           INIT_ONCE
+#define _PINIT_ONCE          PINIT_ONCE
 #define _InitOnceExecuteOnce InitOnceExecuteOnce
 #else // !HAS_INIT_ONCE_EXECUTE_ONCE
 
 typedef volatile LONG _INIT_ONCE;
 typedef _INIT_ONCE *_PINIT_ONCE;
-typedef BOOL(CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
+typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
 
 #define _INIT_ONCE_UNINITIALIZED 0
-#define _INIT_ONCE_IN_PROGRESS 1
-#define _INIT_ONCE_DONE 2
+#define _INIT_ONCE_IN_PROGRESS   1
+#define _INIT_ONCE_DONE          2
 
-static BOOL _InitOnceExecuteOnce(_PINIT_ONCE InitOnce, _PINIT_ONCE_FN InitFn,
-                                 PVOID Parameter, LPVOID *Context)
+static BOOL _InitOnceExecuteOnce(
+  _PINIT_ONCE InitOnce,
+  _PINIT_ONCE_FN InitFn,
+  PVOID Parameter,
+  LPVOID *Context
+)
 {
-    while (*InitOnce != _INIT_ONCE_DONE)
+    while ( *InitOnce != _INIT_ONCE_DONE )
     {
-        if (*InitOnce != _INIT_ONCE_IN_PROGRESS
-            && _InterlockedCompareExchange(InitOnce, _INIT_ONCE_IN_PROGRESS,
-                                           _INIT_ONCE_UNINITIALIZED)
-                == _INIT_ONCE_UNINITIALIZED)
+        if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
         {
-            InitFn(InitOnce, Parameter, Context);
+            InitFn( InitOnce, Parameter, Context );
             *InitOnce = _INIT_ONCE_DONE;
             return TRUE;
         }
-        Sleep(1);
+        Sleep( 1 );
     }
     return TRUE;
 }
@@ -131,352 +125,312 @@
 // #define HAS_CONDITION_VARIABLE 1
 
 #if defined(HAS_CONDITION_VARIABLE)
-#define _CONDITION_VARIABLE CONDITION_VARIABLE
+#define _CONDITION_VARIABLE          CONDITION_VARIABLE
 #define _InitializeConditionVariable InitializeConditionVariable
-#define _SleepConditionVariableCS SleepConditionVariableCS
-#define _WakeAllConditionVariable WakeAllConditionVariable
+#define _SleepConditionVariableCS    SleepConditionVariableCS
+#define _WakeAllConditionVariable    WakeAllConditionVariable
 #else // !HAS_CONDITION_VARIABLE
 typedef struct
 {
-    HANDLE mEvent; // Used to park the thread.
-    // Used to protect mWaiters, mGeneration and mReleaseCount:
-    CRITICAL_SECTION mLock[1];
-    volatile cl_int mWaiters; // Number of threads waiting on this cond var.
-    volatile cl_int mGeneration; // Wait generation count.
-    volatile cl_int mReleaseCount; // Number of releases to execute before
-                                   // reseting the event.
+    HANDLE           mEvent; // Used to park the thread.
+    CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
+    volatile cl_int  mWaiters; // Number of threads waiting on this cond var.
+    volatile cl_int  mGeneration; // Wait generation count.
+    volatile cl_int  mReleaseCount; // Number of releases to execute before reseting the event.
 } _CONDITION_VARIABLE;
 
 typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
 
-static void _InitializeConditionVariable(_PCONDITION_VARIABLE cond_var)
+static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
 {
-    cond_var->mEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
-    InitializeCriticalSection(cond_var->mLock);
+    cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
+    InitializeCriticalSection( cond_var->mLock );
     cond_var->mWaiters = 0;
     cond_var->mGeneration = 0;
-#if !defined(NDEBUG)
+#if !defined ( NDEBUG )
     cond_var->mReleaseCount = 0;
 #endif // !NDEBUG
 }
 
-static void _SleepConditionVariableCS(_PCONDITION_VARIABLE cond_var,
-                                      PCRITICAL_SECTION cond_lock,
-                                      DWORD ignored)
+static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
 {
-    EnterCriticalSection(cond_var->mLock);
+    EnterCriticalSection( cond_var->mLock );
     cl_int generation = cond_var->mGeneration;
     ++cond_var->mWaiters;
-    LeaveCriticalSection(cond_var->mLock);
-    LeaveCriticalSection(cond_lock);
+    LeaveCriticalSection( cond_var->mLock );
+    LeaveCriticalSection( cond_lock );
 
-    while (TRUE)
+    while ( TRUE )
     {
-        WaitForSingleObject(cond_var->mEvent, INFINITE);
-        EnterCriticalSection(cond_var->mLock);
-        BOOL done =
-            cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
-        LeaveCriticalSection(cond_var->mLock);
-        if (done)
+        WaitForSingleObject( cond_var->mEvent, INFINITE );
+        EnterCriticalSection( cond_var->mLock );
+        BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
+        LeaveCriticalSection( cond_var->mLock );
+        if ( done )
         {
             break;
         }
     }
 
-    EnterCriticalSection(cond_lock);
-    EnterCriticalSection(cond_var->mLock);
-    if (--cond_var->mReleaseCount == 0)
+    EnterCriticalSection( cond_lock );
+    EnterCriticalSection( cond_var->mLock );
+    if ( --cond_var->mReleaseCount == 0 )
     {
-        ResetEvent(cond_var->mEvent);
+        ResetEvent( cond_var->mEvent );
     }
     --cond_var->mWaiters;
-    LeaveCriticalSection(cond_var->mLock);
+    LeaveCriticalSection( cond_var->mLock );
 }
 
-static void _WakeAllConditionVariable(_PCONDITION_VARIABLE cond_var)
+static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
 {
-    EnterCriticalSection(cond_var->mLock);
-    if (cond_var->mWaiters > 0)
+    EnterCriticalSection( cond_var->mLock );
+    if (cond_var->mWaiters > 0 )
     {
         ++cond_var->mGeneration;
         cond_var->mReleaseCount = cond_var->mWaiters;
-        SetEvent(cond_var->mEvent);
+        SetEvent( cond_var->mEvent );
     }
-    LeaveCriticalSection(cond_var->mLock);
+    LeaveCriticalSection( cond_var->mLock );
 }
 #endif // !HAS_CONDITION_VARIABLE
 #endif // _WIN32
 
-#define MAX_COUNT (1 << 29)
+#define MAX_COUNT   (1<<29)
 
-// Global state to coordinate whether the threads have been launched
-// successfully or not
-#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+// Global state to coordinate whether the threads have been launched successfully or not
+#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
 static _INIT_ONCE threadpool_init_control;
-#elif defined(_WIN32) // MingW of XP
+#elif defined (_WIN32)  // MingW of XP
 static int threadpool_init_control;
 #else // Posix platforms
 pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
 #endif
-cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
+cl_int threadPoolInitErr = -1;          // set to CL_SUCCESS on successful thread launch
 
-// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do
-// at a time, because we are too lazy to set up a queue here, and don't expect
-// to need one.
-#if defined(_WIN32)
-CRITICAL_SECTION gThreadPoolLock[1];
+// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do at a time,
+// because we are too lazy to set up a queue here, and don't expect to need one.
+#if defined( _WIN32 )
+CRITICAL_SECTION    gThreadPoolLock[1];
 #else // !_WIN32
-pthread_mutex_t gThreadPoolLock;
+pthread_mutex_t     gThreadPoolLock;
 #endif // !_WIN32
 
 // Condition variable to park ThreadPool threads when not working
-#if defined(_WIN32)
-CRITICAL_SECTION cond_lock[1];
+#if defined( _WIN32 )
+CRITICAL_SECTION    cond_lock[1];
 _CONDITION_VARIABLE cond_var[1];
 #else // !_WIN32
-pthread_mutex_t cond_lock;
-pthread_cond_t cond_var;
+pthread_mutex_t     cond_lock;
+pthread_cond_t      cond_var;
 #endif // !_WIN32
-
-// Condition variable state. How many iterations on the function left to run,
-// set to CL_INT_MAX to cause worker threads to exit. Note: this value might
-// go negative.
-volatile cl_int gRunCount = 0;
+volatile cl_int     gRunCount = 0;              // Condition variable state. How many iterations on the function left to run.
+                                                // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
 
 // State that only changes when the threadpool is not working.
-volatile TPFuncPtr gFunc_ptr = NULL;
-volatile void *gUserInfo = NULL;
-volatile cl_int gJobCount = 0;
+volatile TPFuncPtr  gFunc_ptr = NULL;
+volatile void       *gUserInfo = NULL;
+volatile cl_int     gJobCount = 0;
 
 // State that may change while the thread pool is working
-volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
+volatile cl_int     jobError = CL_SUCCESS;      // err code return for the job as a whole
 
 // Condition variable to park caller while waiting
-#if defined(_WIN32)
-HANDLE caller_event;
+#if defined( _WIN32 )
+HANDLE              caller_event;
 #else // !_WIN32
-pthread_mutex_t caller_cond_lock;
-pthread_cond_t caller_cond_var;
+pthread_mutex_t     caller_cond_lock;
+pthread_cond_t      caller_cond_var;
 #endif // !_WIN32
-
-// # of threads intended to be running. Running threads will decrement this
-// as they discover they've run out of work to do.
-volatile cl_int gRunning = 0;
+volatile cl_int     gRunning = 0;       // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
 
 // The total number of threads launched.
-volatile cl_int gThreadCount = 0;
+volatile cl_int     gThreadCount = 0;
 #ifdef _WIN32
-void ThreadPool_WorkerFunc(void *p)
+void ThreadPool_WorkerFunc( void *p )
 #else
-void *ThreadPool_WorkerFunc(void *p)
+void *ThreadPool_WorkerFunc( void *p )
 #endif
 {
-    cl_uint threadID = ThreadPool_AtomicAdd((volatile cl_int *)p, 1);
-    cl_int item = ThreadPool_AtomicAdd(&gRunCount, -1);
-    // log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
+    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+//    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
 
-    while (MAX_COUNT > item)
+    while( MAX_COUNT > item )
     {
         cl_int err;
 
         // check for more work to do
-        if (0 >= item)
+        if( 0 >= item )
         {
-            // log_info("Thread %d has run out of work.\n", threadID);
+//            log_info( "Thread %d has run out of work.\n", threadID );
 
             // No work to do. Attempt to block waiting for work
-#if defined(_WIN32)
-            EnterCriticalSection(cond_lock);
+#if defined( _WIN32 )
+            EnterCriticalSection( cond_lock );
 #else // !_WIN32
-            if ((err = pthread_mutex_lock(&cond_lock)))
+            if((err = pthread_mutex_lock( &cond_lock) ))
             {
-                log_error(
-                    "Error %d from pthread_mutex_lock. Worker %d unable to "
-                    "block waiting for work. ThreadPool_WorkerFunc failed.\n",
-                    err, threadID);
+                log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
                 goto exit;
             }
 #endif // !_WIN32
 
-            cl_int remaining = ThreadPool_AtomicAdd(&gRunning, -1);
-            // log_info("ThreadPool_WorkerFunc: gRunning = %d\n",
-            //          remaining - 1);
-            if (1 == remaining)
+            cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
+//            log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
+            if( 1 == remaining )
             { // last thread out signal the main thread to wake up
-#if defined(_WIN32)
-                SetEvent(caller_event);
+#if defined( _WIN32 )
+                SetEvent( caller_event );
 #else // !_WIN32
-                if ((err = pthread_mutex_lock(&caller_cond_lock)))
+                if((err = pthread_mutex_lock( &caller_cond_lock) ))
                 {
-                    log_error("Error %d from pthread_mutex_lock. Unable to "
-                              "wake caller.\n",
-                              err);
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
                     goto exit;
                 }
-                if ((err = pthread_cond_broadcast(&caller_cond_var)))
+                if( (err = pthread_cond_broadcast( &caller_cond_var )))
                 {
-                    log_error(
-                        "Error %d from pthread_cond_broadcast. Unable to wake "
-                        "up main thread. ThreadPool_WorkerFunc failed.\n",
-                        err);
+                    log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
                     goto exit;
                 }
-                if ((err = pthread_mutex_unlock(&caller_cond_lock)))
+                if((err = pthread_mutex_unlock( &caller_cond_lock) ))
                 {
-                    log_error("Error %d from pthread_mutex_lock. Unable to "
-                              "wake caller.\n",
-                              err);
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
                     goto exit;
                 }
 #endif // !_WIN32
             }
 
-            // loop in case we are woken only to discover that some other thread
-            // already did all the work
-            while (0 >= item)
+            // loop in case we are woken only to discover that some other thread already did all the work
+            while( 0 >= item )
             {
-#if defined(_WIN32)
-                _SleepConditionVariableCS(cond_var, cond_lock, INFINITE);
+#if defined( _WIN32 )
+                _SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
 #else // !_WIN32
-                if ((err = pthread_cond_wait(&cond_var, &cond_lock)))
+                if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
                 {
-                    log_error(
-                        "Error %d from pthread_cond_wait. Unable to block for "
-                        "waiting for work. ThreadPool_WorkerFunc failed.\n",
-                        err);
-                    pthread_mutex_unlock(&cond_lock);
+                    log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                    pthread_mutex_unlock( &cond_lock);
                     goto exit;
                 }
 #endif // !_WIN32
 
                 // try again to get a valid item id
-                item = ThreadPool_AtomicAdd(&gRunCount, -1);
-                if (MAX_COUNT <= item) // exit if we are done
+                item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+                if( MAX_COUNT <= item )  // exit if we are done
                 {
-#if defined(_WIN32)
-                    LeaveCriticalSection(cond_lock);
+#if defined( _WIN32 )
+                    LeaveCriticalSection( cond_lock );
 #else // !_WIN32
-                    pthread_mutex_unlock(&cond_lock);
+                    pthread_mutex_unlock( &cond_lock);
 #endif // !_WIN32
                     goto exit;
                 }
             }
 
-            ThreadPool_AtomicAdd(&gRunning, 1);
-            // log_info("Thread %d has found work.\n", threadID);
+            ThreadPool_AtomicAdd( &gRunning, 1 );
+//            log_info( "Thread %d has found work.\n", threadID);
 
-#if defined(_WIN32)
-            LeaveCriticalSection(cond_lock);
+#if defined( _WIN32 )
+            LeaveCriticalSection( cond_lock );
 #else // !_WIN32
-            if ((err = pthread_mutex_unlock(&cond_lock)))
+            if((err = pthread_mutex_unlock( &cond_lock) ))
             {
-                log_error(
-                    "Error %d from pthread_mutex_unlock. Unable to block for "
-                    "waiting for work. ThreadPool_WorkerFunc failed.\n",
-                    err);
+                log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
                 goto exit;
             }
 #endif // !_WIN32
+
         }
 
         // we have a valid item, so do the work
-        // but only if we haven't already encountered an error
-        if (CL_SUCCESS == jobError)
+        if( CL_SUCCESS == jobError )  // but only if we haven't already encountered an error
         {
-            // log_info("Thread %d doing job %d\n", threadID, item - 1);
+//            log_info( "Thread %d doing job %d\n", threadID, item - 1);
 
 #if defined(__APPLE__) && defined(__arm__)
-            // On most platforms which support denorm, default is FTZ off.
-            // However, on some hardware where the reference is computed,
-            // default might be flush denorms to zero e.g. arm. This creates
-            // issues in result verification. Since spec allows the
-            // implementation to either flush or not flush denorms to zero, an
-            // implementation may choose not be flush i.e. return denorm result
-            // whereas reference result may be zero (flushed denorm). Hence we
-            // need to disable denorm flushing on host side where reference is
-            // being computed to make sure we get non-flushed reference result.
-            // If implementation returns flushed result, we correctly take care
-            // of that in verification code.
+            // On most platforms which support denorm, default is FTZ off. However,
+            // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+            // This creates issues in result verification. Since spec allows the implementation to either flush or
+            // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+            // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+            // where reference is being computed to make sure we get non-flushed reference result. If implementation
+            // returns flushed result, we correctly take care of that in verification code.
             FPU_mode_type oldMode;
-            DisableFTZ(&oldMode);
+            DisableFTZ( &oldMode );
 #endif
 
             // Call the user's function with this item ID
-            err = gFunc_ptr(item - 1, threadID, (void *)gUserInfo);
+            err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
 #if defined(__APPLE__) && defined(__arm__)
             // Restore FP state
-            RestoreFPState(&oldMode);
+            RestoreFPState( &oldMode );
 #endif
 
-            if (err)
+            if( err )
             {
 #if (__MINGW32__)
                 EnterCriticalSection(&gAtomicLock);
-                if (jobError == CL_SUCCESS) jobError = err;
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
                 gRunCount = 0;
                 LeaveCriticalSection(&gAtomicLock);
-#elif defined(__GNUC__)
-                // GCC extension:
-                // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+#elif defined( __GNUC__ )
+                // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
                 // set the new error if we are the first one there.
-                __sync_val_compare_and_swap(&jobError, CL_SUCCESS, err);
+                __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
 
                 // drop run count to 0
                 gRunCount = 0;
                 __sync_synchronize();
-#elif defined(_MSC_VER)
+#elif defined( _MSC_VER )
                 // set the new error if we are the first one there.
-                _InterlockedCompareExchange((volatile LONG *)&jobError, err,
-                                            CL_SUCCESS);
+                _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
 
                 // drop run count to 0
                 gRunCount = 0;
                 _mm_mfence();
 #else
-                if (pthread_mutex_lock(&gAtomicLock))
-                    log_error(
-                        "Atomic operation failed. "
-                        "pthread_mutex_lock(&gAtomicLock) returned an error\n");
-                if (jobError == CL_SUCCESS) jobError = err;
+                if( pthread_mutex_lock(&gAtomicLock) )
+                    log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
                 gRunCount = 0;
-                if (pthread_mutex_unlock(&gAtomicLock))
-                    log_error("Failed to release gAtomicLock. Further atomic "
-                              "operations may deadlock\n");
+                if( pthread_mutex_unlock(&gAtomicLock) )
+                    log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
 #endif
             }
         }
 
         // get the next item
-        item = ThreadPool_AtomicAdd(&gRunCount, -1);
+        item = ThreadPool_AtomicAdd( &gRunCount, -1 );
     }
 
 exit:
-    log_info("ThreadPool: thread %d exiting.\n", threadID);
-    ThreadPool_AtomicAdd(&gThreadCount, -1);
+    log_info( "ThreadPool: thread %d exiting.\n", threadID );
+    ThreadPool_AtomicAdd( &gThreadCount, -1 );
 #if !defined(_WIN32)
     return NULL;
 #endif
 }
 
 // SetThreadCount() may be used to artifically set the number of worker threads
-// If the value is 0 (the default) the number of threads will be determined
-// based on the number of CPU cores.  If it is a unicore machine, then 2 will be
-// used, so that we still get some testing for thread safety.
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
 //
-// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then
-// the code will run single threaded, but will report an error to indicate that
-// the test is invalid.  This option is intended for debugging purposes only. It
-// is suggested as a convention that test apps set the thread count to 1 in
-// response to the -m flag.
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
 //
-// SetThreadCount() must be called before the first call to GetThreadCount() or
-// ThreadPool_Do(), otherwise the behavior is indefined.
-void SetThreadCount(int count)
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined.
+void        SetThreadCount( int count )
 {
-    if (threadPoolInitErr == CL_SUCCESS)
+    if( threadPoolInitErr == CL_SUCCESS )
     {
-        log_error("Error: It is illegal to set the thread count after the "
-                  "first call to ThreadPool_Do or GetThreadCount\n");
+        log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
         abort();
     }
 
@@ -489,42 +443,35 @@
     int err;
     volatile cl_uint threadID = 0;
 
-    // Check for manual override of multithreading code. We add this for better
-    // debuggability.
-    if (getenv("CL_TEST_SINGLE_THREADED"))
+    // Check for manual override of multithreading code. We add this for better debuggability.
+    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
     {
-        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
-                  "Running single threaded.\n*** TEST IS INVALID! ***\n");
+        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
         gThreadCount = 1;
         return;
     }
 
-    // Figure out how many threads to run -- check first for non-zero to give
-    // the implementation the chance
-    if (0 == gThreadCount)
+    // Figure out how many threads to run -- check first for non-zero to give the implementation the chance
+    if( 0 == gThreadCount )
     {
-#if defined(_MSC_VER) || defined(__MINGW64__)
+#if defined(_MSC_VER) || defined (__MINGW64__)
         PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
         DWORD length = 0;
 
-        GetLogicalProcessorInformation(NULL, &length);
-        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length);
-        if (buffer != NULL)
+        GetLogicalProcessorInformation( NULL, &length );
+        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
+        if( buffer != NULL )
         {
-            if (GetLogicalProcessorInformation(buffer, &length) == TRUE)
+            if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
             {
                 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
-                while (
-                    ptr
-                    < &buffer[length
-                              / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)])
+                while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
                 {
-                    if (ptr->Relationship == RelationProcessorCore)
+                    if( ptr->Relationship == RelationProcessorCore )
                     {
-                        // Count the number of bits in ProcessorMask (number of
-                        // logical cores)
+                        // Count the number of bits in ProcessorMask (number of logical cores)
                         ULONG mask = ptr->ProcessorMask;
-                        while (mask)
+                        while( mask )
                         {
                             ++gThreadCount;
                             mask &= mask - 1; // Remove 1 bit at a time
@@ -535,71 +482,66 @@
             }
             free(buffer);
         }
-#elif defined(__MINGW32__)
+#elif defined (__MINGW32__)
         {
-#warning How about this, instead of hard coding it to 2?
+            #warning  How about this, instead of hard coding it to 2?
             SYSTEM_INFO sysinfo;
-            GetSystemInfo(&sysinfo);
+            GetSystemInfo( &sysinfo );
             gThreadCount = sysinfo.dwNumberOfProcessors;
         }
-#elif defined(__linux__) && !defined(__ANDROID__)
-        cpu_set_t affinity;
-        if (0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity))
+#elif defined (__linux__) && !defined(__ANDROID__)
+        cpu_set_t    affinity;
+        if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
         {
 #if !(defined(CPU_COUNT))
-            gThreadCount = 1;
+        gThreadCount = 1;
 #else
             gThreadCount = CPU_COUNT(&affinity);
 #endif
         }
         else
         {
-            // Hopefully your system returns logical cpus here, as does MacOS X
-            gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF);
+            gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
         }
-#else /* !_WIN32 */
-        // Hopefully your system returns logical cpus here, as does MacOS X
-        gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF);
+#else // !_WIN32
+        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
 #endif // !_WIN32
 
-        // Multithreaded tests are required to run multithreaded even on unicore
-        // systems so as to test thread safety
-        if (1 == gThreadCount) gThreadCount = 2;
+        // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
+        if( 1 == gThreadCount )
+            gThreadCount = 2;
     }
 
-// When working in 32 bit limit the thread number to 12
-// This fix was made due to memory issues in integer_ops test
-// When running integer_ops, the test opens as many threads as the
-// machine has and each thread allocates a fixed amount of memory
-// When running this test on dual socket machine in 32-bit, the
-// process memory is not sufficient and the test fails
-#if defined(_WIN32) && !defined(_M_X64)
-    if (gThreadCount > 12)
-    {
-        gThreadCount = 12;
-    }
-#endif
+    // When working in 32 bit limit the thread number to 12
+    // This fix was made due to memory issues in integer_ops test
+    // When running integer_ops, the test opens as many threads as the
+    // machine has and each thread allocates a fixed amount of memory
+    // When running this test on dual socket machine in 32-bit, the
+    // process memory is not sufficient and the test fails
+    #if defined(_WIN32) && !defined(_M_X64)
+        if (gThreadCount > 12) {
+            gThreadCount = 12;
+        }
+    #endif
 
-    // Allow the app to set thread count to <0 for debugging purposes.
-    // This will cause the test to run single threaded.
-    if (gThreadCount < 2)
+    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
+    if( gThreadCount < 2 )
     {
-        log_error("ERROR: Running single threaded because thread count < 2. "
-                  "\n*** TEST IS INVALID! ***\n");
+        log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
         gThreadCount = 1;
         return;
     }
 
-#if defined(_WIN32)
-    InitializeCriticalSection(gThreadPoolLock);
-    InitializeCriticalSection(cond_lock);
-    _InitializeConditionVariable(cond_var);
-    caller_event = CreateEvent(NULL, FALSE, FALSE, NULL);
-#elif defined(__GNUC__)
-    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since
-    // it might cause problem with some flavors of gcc compilers.
+#if defined( _WIN32 )
+    InitializeCriticalSection( gThreadPoolLock );
+    InitializeCriticalSection( cond_lock );
+    _InitializeConditionVariable( cond_var );
+    caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
+#elif defined (__GNUC__)
+    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
+    // with some flavors of gcc compilers.
     pthread_cond_init(&cond_var, NULL);
-    pthread_mutex_init(&cond_lock, NULL);
+    pthread_mutex_init(&cond_lock ,NULL);
     pthread_cond_init(&caller_cond_var, NULL);
     pthread_mutex_init(&caller_cond_lock, NULL);
     pthread_mutex_init(&gThreadPoolLock, NULL);
@@ -607,18 +549,15 @@
 
 #if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
     pthread_mutex_initialize(gAtomicLock);
-#elif defined(__MINGW32__)
+#elif defined (__MINGW32__)
     InitializeCriticalSection(&gAtomicLock);
 #endif
-    // Make sure the last thread done in the work pool doesn't signal us to wake
-    // before we get to the point where we are supposed to wait
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
     //  That would cause a deadlock.
-#if !defined(_WIN32)
-    if ((err = pthread_mutex_lock(&caller_cond_lock)))
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
     {
-        log_error("Error %d from pthread_mutex_lock. Unable to block for work "
-                  "to finish. ThreadPool_Init failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
         gThreadCount = 1;
         return;
     }
@@ -626,50 +565,45 @@
 
     gRunning = gThreadCount;
     // init threads
-    for (i = 0; i < gThreadCount; i++)
+    for( i = 0; i < gThreadCount; i++ )
     {
-#if defined(_WIN32)
-        uintptr_t handle =
-            _beginthread(ThreadPool_WorkerFunc, 0, (void *)&threadID);
-        err = (handle == 0);
+#if defined( _WIN32 )
+        uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
+        err = ( handle == 0 );
 #else // !_WIN32
         pthread_t tid = 0;
-        err = pthread_create(&tid, NULL, ThreadPool_WorkerFunc,
-                             (void *)&threadID);
+        err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
 #endif // !_WIN32
-        if (err)
+        if( err )
         {
-            log_error("Error %d launching thread %d\n", err, i);
+            log_error( "Error %d launching thread %d\n", err, i );
             threadPoolInitErr = err;
             gThreadCount = i;
             break;
         }
     }
 
-    atexit(ThreadPool_Exit);
+    atexit( ThreadPool_Exit );
 
-    // block until they are done launching.
+// block until they are done launching.
     do
     {
-#if defined(_WIN32)
-        WaitForSingleObject(caller_event, INFINITE);
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
 #else // !_WIN32
-        if ((err = pthread_cond_wait(&caller_cond_var, &caller_cond_lock)))
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
         {
-            log_error("Error %d from pthread_cond_wait. Unable to block for "
-                      "work to finish. ThreadPool_Init failed.\n",
-                      err);
-            pthread_mutex_unlock(&caller_cond_lock);
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
             return;
         }
 #endif // !_WIN32
-    } while (gRunCount != -gThreadCount);
-#if !defined(_WIN32)
-    if ((err = pthread_mutex_unlock(&caller_cond_lock)))
+    }
+    while( gRunCount != -gThreadCount );
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
     {
-        log_error("Error %d from pthread_mutex_unlock. Unable to block for "
-                  "work to finish. ThreadPool_Init failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
         return;
     }
 #endif // !_WIN32
@@ -678,8 +612,7 @@
 }
 
 #if defined(_MSC_VER)
-static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter,
-                                      PVOID *lpContex)
+static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
 {
     ThreadPool_Init();
     return TRUE;
@@ -691,40 +624,35 @@
     int err, count;
     gRunCount = CL_INT_MAX;
 
-#if defined(__GNUC__)
-    // GCC extension:
-    // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+#if defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
     __sync_synchronize();
-#elif defined(_MSC_VER)
+#elif defined( _MSC_VER )
     _mm_mfence();
 #else
-#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
+    #warning   If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
 #endif
 
     // spin waiting for threads to die
     for (count = 0; 0 != gThreadCount && count < 1000; count++)
     {
-#if defined(_WIN32)
-        _WakeAllConditionVariable(cond_var);
+#if defined( _WIN32 )
+        _WakeAllConditionVariable( cond_var );
         Sleep(1);
 #else // !_WIN32
-        if ((err = pthread_cond_broadcast(&cond_var)))
+        if( (err = pthread_cond_broadcast( &cond_var )))
         {
-            log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
-                      "work threads. ThreadPool_Exit failed.\n",
-                      err);
+            log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
             break;
         }
         usleep(1000);
 #endif // !_WIN32
     }
 
-    if (gThreadCount)
-        log_error("Error: Thread pool timed out after 1 second with %d threads "
-                  "still active.\n",
-                  gThreadCount);
+    if( gThreadCount )
+        log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
     else
-        log_info("Thread pool exited in a orderly fashion.\n");
+        log_info( "Thread pool exited in a orderly fashion.\n" );
 }
 
 
@@ -736,123 +664,108 @@
 // can be running at a time. It is not intended for general purpose use.
 // If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
 // all available then it would make more sense to use those features.
-cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
+cl_int ThreadPool_Do( TPFuncPtr func_ptr,
+                      cl_uint count,
+                      void *userInfo )
 {
     cl_int newErr;
     cl_int err = 0;
     // Lazily set up our threads
 #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
-    err = !_InitOnceExecuteOnce(&threadpool_init_control, _ThreadPool_Init,
-                                NULL, NULL);
-#elif defined(_WIN32)
-    if (threadpool_init_control == 0)
-    {
-#warning This is buggy and race prone.  Find a better way.
+    err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
         ThreadPool_Init();
         threadpool_init_control = 1;
     }
-#else // posix platform
-    err = pthread_once(&threadpool_init_control, ThreadPool_Init);
-    if (err)
+#else //posix platform
+    err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
     {
-        log_error("Error %d from pthread_once. Unable to init threads. "
-                  "ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
         return err;
     }
 #endif
-    // Single threaded code to handle case where threadpool wasn't allocated or
-    // was disabled by environment variable
-    if (threadPoolInitErr)
+    // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
+    if( threadPoolInitErr )
     {
         cl_uint currentJob = 0;
-        cl_int result = CL_SUCCESS;
+        cl_int  result = CL_SUCCESS;
 
 #if defined(__APPLE__) && defined(__arm__)
         // On most platforms which support denorm, default is FTZ off. However,
-        // on some hardware where the reference is computed, default might be
-        // flush denorms to zero e.g. arm. This creates issues in result
-        // verification. Since spec allows the implementation to either flush or
-        // not flush denorms to zero, an implementation may choose not be flush
-        // i.e. return denorm result whereas reference result may be zero
-        // (flushed denorm). Hence we need to disable denorm flushing on host
-        // side where reference is being computed to make sure we get
-        // non-flushed reference result. If implementation returns flushed
-        // result, we correctly take care of that in verification code.
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
         FPU_mode_type oldMode;
-        DisableFTZ(&oldMode);
+        DisableFTZ( &oldMode );
 #endif
-        for (currentJob = 0; currentJob < count; currentJob++)
-            if ((result = func_ptr(currentJob, 0, userInfo)))
+        for( currentJob = 0; currentJob < count; currentJob++ )
+            if((result = func_ptr( currentJob, 0, userInfo )))
             {
 #if defined(__APPLE__) && defined(__arm__)
                 // Restore FP state before leaving
-                RestoreFPState(&oldMode);
+                RestoreFPState( &oldMode );
 #endif
                 return result;
             }
 
 #if defined(__APPLE__) && defined(__arm__)
         // Restore FP state before leaving
-        RestoreFPState(&oldMode);
+        RestoreFPState( &oldMode );
 #endif
 
         return CL_SUCCESS;
     }
 
-    if (count >= MAX_COUNT)
+    if( count >= MAX_COUNT )
     {
-        log_error(
-            "Error: ThreadPool_Do count %d >= max threadpool count of %d\n",
-            count, MAX_COUNT);
+        log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
         return -1;
     }
 
     // Enter critical region
-#if defined(_WIN32)
-    EnterCriticalSection(gThreadPoolLock);
+#if defined( _WIN32 )
+    EnterCriticalSection( gThreadPoolLock );
 #else // !_WIN32
-    if ((err = pthread_mutex_lock(&gThreadPoolLock)))
+    if( (err = pthread_mutex_lock( &gThreadPoolLock )))
     {
         switch (err)
         {
             case EDEADLK:
-                log_error(
-                    "Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do "
-                    "is not designed to work recursively!\n");
+                log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
                 break;
             case EINVAL:
-                log_error("Error EINVAL returned in ThreadPool_Do(). How did "
-                          "we end up with an invalid gThreadPoolLock?\n");
+                log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
                 break;
-            default: break;
+            default:
+                break;
         }
         return err;
     }
 #endif // !_WIN32
 
     // Start modifying the job state observable by worker threads
-#if defined(_WIN32)
-    EnterCriticalSection(cond_lock);
+#if defined( _WIN32 )
+    EnterCriticalSection( cond_lock );
 #else // !_WIN32
-    if ((err = pthread_mutex_lock(&cond_lock)))
+    if((err = pthread_mutex_lock( &cond_lock) ))
     {
-        log_error("Error %d from pthread_mutex_lock. Unable to wake up work "
-                  "threads. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
         goto exit;
     }
 #endif // !_WIN32
 
-    // Make sure the last thread done in the work pool doesn't signal us to wake
-    // before we get to the point where we are supposed to wait
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
     //  That would cause a deadlock.
-#if !defined(_WIN32)
-    if ((err = pthread_mutex_lock(&caller_cond_lock)))
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
     {
-        log_error("Error %d from pthread_mutex_lock. Unable to block for work "
-                  "to finish. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
         goto exit;
     }
 #endif // !_WIN32
@@ -863,50 +776,42 @@
     gFunc_ptr = func_ptr;
     gUserInfo = userInfo;
 
-#if defined(_WIN32)
+#if defined( _WIN32 )
     ResetEvent(caller_event);
-    _WakeAllConditionVariable(cond_var);
-    LeaveCriticalSection(cond_lock);
+    _WakeAllConditionVariable( cond_var );
+    LeaveCriticalSection( cond_lock );
 #else // !_WIN32
-    if ((err = pthread_cond_broadcast(&cond_var)))
+    if( (err = pthread_cond_broadcast( &cond_var )))
     {
-        log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
-                  "work threads. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
         goto exit;
     }
-    if ((err = pthread_mutex_unlock(&cond_lock)))
+    if((err = pthread_mutex_unlock( &cond_lock) ))
     {
-        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work "
-                  "threads. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
         goto exit;
     }
 #endif // !_WIN32
 
-    // block until they are done.  It would be slightly more efficient to do
-    // some of the work here though.
+// block until they are done.  It would be slightly more efficient to do some of the work here though.
     do
     {
-#if defined(_WIN32)
-        WaitForSingleObject(caller_event, INFINITE);
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
 #else // !_WIN32
-        if ((err = pthread_cond_wait(&caller_cond_var, &caller_cond_lock)))
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
         {
-            log_error("Error %d from pthread_cond_wait. Unable to block for "
-                      "work to finish. ThreadPool_Do failed.\n",
-                      err);
-            pthread_mutex_unlock(&caller_cond_lock);
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
             goto exit;
         }
 #endif // !_WIN32
-    } while (gRunning);
+    }
+    while( gRunning );
 #if !defined(_WIN32)
-    if ((err = pthread_mutex_unlock(&caller_cond_lock)))
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
     {
-        log_error("Error %d from pthread_mutex_unlock. Unable to block for "
-                  "work to finish. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
         goto exit;
     }
 #endif // !_WIN32
@@ -915,15 +820,13 @@
 
 exit:
     // exit critical region
-#if defined(_WIN32)
-    LeaveCriticalSection(gThreadPoolLock);
+#if defined( _WIN32 )
+    LeaveCriticalSection( gThreadPoolLock );
 #else // !_WIN32
-    newErr = pthread_mutex_unlock(&gThreadPoolLock);
-    if (newErr)
+    newErr = pthread_mutex_unlock( &gThreadPoolLock );
+    if( newErr)
     {
-        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical "
-                  "region. ThreadPool_Do failed.\n",
-                  newErr);
+        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
         return err;
     }
 #endif // !_WIN32
@@ -931,31 +834,28 @@
     return err;
 }
 
-cl_uint GetThreadCount(void)
+cl_uint GetThreadCount( void )
 {
     // Lazily set up our threads
 #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
-    cl_int err = !_InitOnceExecuteOnce(&threadpool_init_control,
-                                       _ThreadPool_Init, NULL, NULL);
-#elif defined(_WIN32)
-    if (threadpool_init_control == 0)
-    {
-#warning This is buggy and race prone.  Find a better way.
+    cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
         ThreadPool_Init();
         threadpool_init_control = 1;
     }
 #else
-    cl_int err = pthread_once(&threadpool_init_control, ThreadPool_Init);
-    if (err)
+    cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
     {
-        log_error("Error %d from pthread_once. Unable to init threads. "
-                  "ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
         return err;
     }
 #endif // !_WIN32
 
-    if (gThreadCount < 1) return 1;
+    if( gThreadCount < 1 )
+        return 1;
 
     return gThreadCount;
 }
@@ -963,26 +863,24 @@
 #else
 
 #ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
-#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
+    #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
 #endif
 //
-// We require multithreading in parts of the test as a means of simultaneously
-// testing reentrancy requirements of OpenCL API, while also checking
+// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
+// of OpenCL API, while also checking
 //
-// A sample single threaded implementation follows, for documentation /
-// bootstrapping purposes. It is not okay to use this for conformance testing!!!
+// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
+// It is not okay to use this for conformance testing!!!
 //
-// Exception:  If your operating system does not support multithreaded execution
-// of any kind, then you may use this code.
+// Exception:  If your operating system does not support multithreaded execution of any kind, then you may use this code.
 //
 
-cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b)
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
 {
     cl_uint r = *a;
 
-    // since this fallback code path is not multithreaded, we just do a regular
-    // add here. If your operating system supports memory-barrier-atomics, use
-    // those here.
+    // since this fallback code path is not multithreaded, we just do a regular add here
+    // If your operating system supports memory-barrier-atomics, use those here
     *a = r + b;
 
     return r;
@@ -991,38 +889,44 @@
 // Blocking API that farms out count jobs to a thread pool.
 // It may return with some work undone if func_ptr() returns a non-zero
 // result.
-cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
+cl_int ThreadPool_Do(   TPFuncPtr func_ptr,
+                        cl_uint count,
+                        void *userInfo )
 {
     cl_uint currentJob = 0;
-    cl_int result = CL_SUCCESS;
+    cl_int  result = CL_SUCCESS;
 
 #ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
     // THIS FUNCTION IS NOT INTENDED FOR USE!!
-    log_error("ERROR:  Test must be multithreaded!\n");
+    log_error( "ERROR:  Test must be multithreaded!\n" );
     exit(-1);
 #else
     static int spewCount = 0;
 
-    if (0 == spewCount)
+    if( 0 == spewCount )
     {
-        log_info("\nWARNING:  The operating system is claimed not to support "
-                 "threads of any sort. Running single threaded.\n");
+        log_info( "\nWARNING:  The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
         spewCount = 1;
     }
 #endif
 
-    // The multithreaded code should mimic this behavior:
-    for (currentJob = 0; currentJob < count; currentJob++)
-        if ((result = func_ptr(currentJob, 0, userInfo))) return result;
+// The multithreaded code should mimic this behavior:
+    for( currentJob = 0; currentJob < count; currentJob++ )
+        if((result = func_ptr( currentJob, 0, userInfo )))
+            return result;
 
     return CL_SUCCESS;
 }
 
-cl_uint GetThreadCount(void) { return 1; }
-
-void SetThreadCount(int count)
+cl_uint GetThreadCount( void )
 {
-    if (count > 1) log_info("WARNING: SetThreadCount(%d) ignored\n", count);
+    return 1;
+}
+
+void SetThreadCount( int count )
+{
+    if( count > 1 )
+        log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
 }
 
 #endif

diff --git a/test_common/harness/ThreadPool.h b/test_common/harness/ThreadPool.h
index 2ef07b4..2fa9c7b 100644
--- a/test_common/harness/ThreadPool.h
+++ b/test_common/harness/ThreadPool.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,54 +16,53 @@
 #ifndef THREAD_POOL_H
 #define THREAD_POOL_H
 
-#if defined(__APPLE__)
-#include <OpenCL/opencl.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
 #else
-#include <CL/cl.h>
+    #include <CL/cl.h>
 #endif
 
 //
 // An atomic add operator
-cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b); // returns old value
+cl_int     ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b );    // returns old value
 
 // Your function prototype
 //
-// A function pointer to the function you want to execute in a multithreaded
-// context.  No synchronization primitives are provided, other than the atomic
-// add above. You may not call ThreadPool_Do from your function.
-// ThreadPool_AtomicAdd() and GetThreadCount() should work, however.
+// A function pointer to the function you want to execute in a multithreaded context.  No
+// synchronization primitives are provided, other than the atomic add above. You may not
+// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
+// work, however.
 //
-// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they
-// will numbered be 0 through 7. Note that while every job will be run, it is
-// not guaranteed that every thread will wake up before the work is done.
-typedef cl_int (*TPFuncPtr)(cl_uint /*job_id*/, cl_uint /* thread_id */,
-                            void *userInfo);
+// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they will numbered be 0 through 7.
+// Note that while every job will be run, it is not guaranteed that every thread will wake up before
+// the work is done.
+typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
 
 // returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
 // Some workitems may not run if a non-zero result is returned from func_ptr().
 // This function may not be called from a TPFuncPtr.
-cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo);
+cl_int      ThreadPool_Do(  TPFuncPtr func_ptr,
+                            cl_uint count,
+                            void *userInfo );
 
-// Returns the number of worker threads that underlie the threadpool.  The value
-// passed as the TPFuncPtrs thread_id will be between 0 and this value less one,
-// inclusive. This is safe to call from a TPFuncPtr.
-cl_uint GetThreadCount(void);
+// Returns the number of worker threads that underlie the threadpool.  The value passed
+// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
+// This is safe to call from a TPFuncPtr.
+cl_uint     GetThreadCount( void );
 
 // SetThreadCount() may be used to artifically set the number of worker threads
-// If the value is 0 (the default) the number of threads will be determined
-// based on the number of CPU cores.  If it is a unicore machine, then 2 will be
-// used, so that we still get some testing for thread safety.
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
 //
-// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then
-// the code will run single threaded, but will report an error to indicate that
-// the test is invalid.  This option is intended for debugging purposes only. It
-// is suggested as a convention that test apps set the thread count to 1 in
-// response to the -m flag.
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
 //
-// SetThreadCount() must be called before the first call to GetThreadCount() or
-// ThreadPool_Do(), otherwise the behavior is indefined. It may not be called
-// from a TPFuncPtr.
-void SetThreadCount(int count);
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
+void        SetThreadCount( int count );
 
 
-#endif /* THREAD_POOL_H  */
+#endif  /* THREAD_POOL_H  */

diff --git a/test_common/harness/alloc.h b/test_common/harness/alloc.h
index 653dde0..33e6bd8 100644
--- a/test_common/harness/alloc.h
+++ b/test_common/harness/alloc.h

@@ -17,7 +17,7 @@
 #ifndef HARNESS_ALLOC_H_
 #define HARNESS_ALLOC_H_
 
-#if defined(__linux__) || defined(linux) || defined(__APPLE__)
+#if defined(__linux__) || defined (linux) || defined(__APPLE__)
 #if defined(__ANDROID__)
 #include <malloc.h>
 #else
@@ -29,41 +29,43 @@
 #include "mingw_compat.h"
 #endif
 
-static void* align_malloc(size_t size, size_t alignment)
+static void * align_malloc(size_t size, size_t alignment)
 {
 #if defined(_WIN32) && defined(_MSC_VER)
     return _aligned_malloc(size, alignment);
-#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
-    void* ptr = NULL;
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
 #if defined(__ANDROID__)
     ptr = memalign(alignment, size);
-    if (ptr) return ptr;
+    if ( ptr )
+        return ptr;
 #else
-    if (alignment < sizeof(void*))
-    {
+    if (alignment < sizeof(void*)) {
         alignment = sizeof(void*);
     }
-    if (0 == posix_memalign(&ptr, alignment, size)) return ptr;
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
 #endif
     return NULL;
 #elif defined(__MINGW32__)
     return __mingw_aligned_malloc(size, alignment);
 #else
-#error "Please add support OS for aligned malloc"
+    #error "Please add support OS for aligned malloc"
 #endif
 }
 
-static void align_free(void* ptr)
+static void align_free(void * ptr)
 {
 #if defined(_WIN32) && defined(_MSC_VER)
     _aligned_free(ptr);
-#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
-    return free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
 #elif defined(__MINGW32__)
     return __mingw_aligned_free(ptr);
 #else
-#error "Please add support OS for aligned free"
+    #error "Please add support OS for aligned free"
 #endif
 }
 
 #endif // #ifndef HARNESS_ALLOC_H_
+

diff --git a/test_common/harness/clImageHelper.h b/test_common/harness/clImageHelper.h
index 3019ff3..dfa05ca 100644
--- a/test_common/harness/clImageHelper.h
+++ b/test_common/harness/clImageHelper.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,254 +26,257 @@
 #include "errorHelpers.h"
 
 
-// helper function to replace clCreateImage2D , to make the existing code use
-// the functions of version 1.2 and veriosn 1.1  respectively
+  // helper function to replace clCreateImage2D , to make the existing code use
+  // the functions of version 1.2 and veriosn 1.1  respectively
 
-static inline cl_mem create_image_2d(cl_context context, cl_mem_flags flags,
-                                     const cl_image_format *image_format,
-                                     size_t image_width, size_t image_height,
-                                     size_t image_row_pitch, void *host_ptr,
-                                     cl_int *errcode_ret)
-{
+  static inline cl_mem create_image_2d  (cl_context context,
+                           cl_mem_flags flags,
+                           const cl_image_format *image_format,
+                           size_t image_width,
+                           size_t image_height,
+                           size_t image_row_pitch,
+                           void *host_ptr,
+                           cl_int *errcode_ret)
+  {
     cl_mem mImage = NULL;
 
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
-    {
-        host_ptr = NULL;
-    }
-
 #ifdef CL_VERSION_1_2
     cl_image_desc image_desc_dest;
-    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
+    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
     image_desc_dest.image_width = image_width;
     image_desc_dest.image_height = image_height;
-    image_desc_dest.image_depth = 0; // not usedfor 2d
-    image_desc_dest.image_array_size = 0; // not used for 2d
+    image_desc_dest.image_depth= 0;// not usedfor 2d
+    image_desc_dest.image_array_size = 0;// not used for 2d
     image_desc_dest.image_row_pitch = image_row_pitch;
     image_desc_dest.image_slice_pitch = 0;
     image_desc_dest.num_mip_levels = 0;
     image_desc_dest.num_samples = 0;
-    image_desc_dest.mem_object =
-        NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in
-              // CL_VERSION_1_1, so always is NULL
-    mImage = clCreateImage(context, flags, image_format, &image_desc_dest,
-                           host_ptr, errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    image_desc_dest.mem_object = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
     }
 
 #else
-    mImage =
-        clCreateImage2D(context, flags, image_format, image_width, image_height,
-                        image_row_pitch, host_ptr, errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
+    mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
     }
 #endif
 
     return mImage;
-}
+  }
 
-// helper function to replace clCreateImage2D , to make the existing code use
-// the functions of version 1.2 and veriosn 1.1  respectively
+    // helper function to replace clCreateImage2D , to make the existing code use
+    // the functions of version 1.2 and veriosn 1.1  respectively
 
-static inline cl_mem
-create_image_2d_buffer(cl_context context, cl_mem_flags flags,
-                       const cl_image_format *image_format, size_t image_width,
-                       size_t image_height, size_t image_row_pitch,
-                       cl_mem buffer, cl_int *errcode_ret)
-{
-    cl_mem mImage = NULL;
-
-    cl_image_desc image_desc_dest;
-    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
-    image_desc_dest.image_width = image_width;
-    image_desc_dest.image_height = image_height;
-    image_desc_dest.image_depth = 0; // not usedfor 2d
-    image_desc_dest.image_array_size = 0; // not used for 2d
-    image_desc_dest.image_row_pitch = image_row_pitch;
-    image_desc_dest.image_slice_pitch = 0;
-    image_desc_dest.num_mip_levels = 0;
-    image_desc_dest.num_samples = 0;
-    image_desc_dest.mem_object = buffer;
-    mImage = clCreateImage(context, flags, image_format, &image_desc_dest, NULL,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
+    static inline cl_mem create_image_2d_buffer  (cl_context context,
+                                    cl_mem_flags flags,
+                                    const cl_image_format *image_format,
+                                    size_t image_width,
+                                    size_t image_height,
+                                    size_t image_row_pitch,
+                                    cl_mem buffer,
+                                    cl_int *errcode_ret)
     {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        cl_mem mImage = NULL;
+
+        cl_image_desc image_desc_dest;
+        image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+        image_desc_dest.image_width = image_width;
+        image_desc_dest.image_height = image_height;
+        image_desc_dest.image_depth= 0;// not usedfor 2d
+        image_desc_dest.image_array_size = 0;// not used for 2d
+        image_desc_dest.image_row_pitch = image_row_pitch;
+        image_desc_dest.image_slice_pitch = 0;
+        image_desc_dest.num_mip_levels = 0;
+        image_desc_dest.num_samples = 0;
+        image_desc_dest.mem_object = buffer;
+        mImage = clCreateImage( context, flags, image_format, &image_desc_dest, NULL, errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
     }
 
-    return mImage;
-}
 
 
-static inline cl_mem create_image_3d(cl_context context, cl_mem_flags flags,
-                                     const cl_image_format *image_format,
-                                     size_t image_width, size_t image_height,
-                                     size_t image_depth, size_t image_row_pitch,
-                                     size_t image_slice_pitch, void *host_ptr,
-                                     cl_int *errcode_ret)
-{
+  static inline cl_mem create_image_3d (cl_context context,
+                          cl_mem_flags flags,
+                          const cl_image_format *image_format,
+                          size_t image_width,
+                          size_t image_height,
+                          size_t image_depth,
+                          size_t image_row_pitch,
+                          size_t image_slice_pitch,
+                          void *host_ptr,
+                          cl_int *errcode_ret)
+  {
     cl_mem mImage;
 
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
-    {
-        host_ptr = NULL;
-    }
-
 #ifdef CL_VERSION_1_2
     cl_image_desc image_desc;
     image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
     image_desc.image_width = image_width;
     image_desc.image_height = image_height;
     image_desc.image_depth = image_depth;
-    image_desc.image_array_size = 0; // not used for one image
+    image_desc.image_array_size = 0;// not used for one image
     image_desc.image_row_pitch = image_row_pitch;
     image_desc.image_slice_pitch = image_slice_pitch;
     image_desc.num_mip_levels = 0;
     image_desc.num_samples = 0;
-    image_desc.mem_object =
-        NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in
-              // CL_VERSION_1_1, so always is NULL
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    image_desc.mem_object = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context,
+                           flags,
+                           image_format,
+                           &image_desc,
+                           host_ptr,
+                           errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
     }
 
 #else
-    mImage = clCreateImage3D(context, flags, image_format, image_width,
-                             image_height, image_depth, image_row_pitch,
-                             image_slice_pitch, host_ptr, errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
+    mImage = clCreateImage3D( context,
+                             flags, image_format,
+                             image_width,
+                             image_height,
+                             image_depth,
+                             image_row_pitch,
+                             image_slice_pitch,
+                             host_ptr,
+                             errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
     }
 #endif
 
     return mImage;
-}
+  }
 
-static inline cl_mem
-create_image_2d_array(cl_context context, cl_mem_flags flags,
-                      const cl_image_format *image_format, size_t image_width,
-                      size_t image_height, size_t image_array_size,
-                      size_t image_row_pitch, size_t image_slice_pitch,
-                      void *host_ptr, cl_int *errcode_ret)
-{
-    cl_mem mImage;
-
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
+    static inline cl_mem create_image_2d_array (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_height,
+                                   size_t image_array_size,
+                                   size_t image_row_pitch,
+                                   size_t image_slice_pitch,
+                                   void *host_ptr,
+                                   cl_int *errcode_ret)
     {
-        host_ptr = NULL;
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = image_height;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
     }
 
-    cl_image_desc image_desc;
-    image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-    image_desc.image_width = image_width;
-    image_desc.image_height = image_height;
-    image_desc.image_depth = 1;
-    image_desc.image_array_size = image_array_size;
-    image_desc.image_row_pitch = image_row_pitch;
-    image_desc.image_slice_pitch = image_slice_pitch;
-    image_desc.num_mip_levels = 0;
-    image_desc.num_samples = 0;
-    image_desc.mem_object = NULL;
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
+    static inline cl_mem create_image_1d_array (cl_context context,
+                                         cl_mem_flags flags,
+                                         const cl_image_format *image_format,
+                                         size_t image_width,
+                                         size_t image_array_size,
+                                         size_t image_row_pitch,
+                                         size_t image_slice_pitch,
+                                         void *host_ptr,
+                                         cl_int *errcode_ret)
     {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
     }
 
-    return mImage;
-}
-
-static inline cl_mem create_image_1d_array(
-    cl_context context, cl_mem_flags flags, const cl_image_format *image_format,
-    size_t image_width, size_t image_array_size, size_t image_row_pitch,
-    size_t image_slice_pitch, void *host_ptr, cl_int *errcode_ret)
-{
-    cl_mem mImage;
-
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
+    static inline cl_mem create_image_1d (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_row_pitch,
+                                   void *host_ptr,
+                                   cl_mem buffer,
+                                   cl_int *errcode_ret)
     {
-        host_ptr = NULL;
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = 0;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = buffer;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
     }
 
-    cl_image_desc image_desc;
-    image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
-    image_desc.image_width = image_width;
-    image_desc.image_height = 1;
-    image_desc.image_depth = 1;
-    image_desc.image_array_size = image_array_size;
-    image_desc.image_row_pitch = image_row_pitch;
-    image_desc.image_slice_pitch = image_slice_pitch;
-    image_desc.num_mip_levels = 0;
-    image_desc.num_samples = 0;
-    image_desc.mem_object = NULL;
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
-    }
-
-    return mImage;
-}
-
-static inline cl_mem create_image_1d(cl_context context, cl_mem_flags flags,
-                                     const cl_image_format *image_format,
-                                     size_t image_width, size_t image_row_pitch,
-                                     void *host_ptr, cl_mem buffer,
-                                     cl_int *errcode_ret)
-{
-    cl_mem mImage;
-
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
-    {
-        host_ptr = NULL;
-    }
-
-    cl_image_desc image_desc;
-    image_desc.image_type =
-        buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER : CL_MEM_OBJECT_IMAGE1D;
-    image_desc.image_width = image_width;
-    image_desc.image_height = 1;
-    image_desc.image_depth = 1;
-    image_desc.image_row_pitch = image_row_pitch;
-    image_desc.image_slice_pitch = 0;
-    image_desc.num_mip_levels = 0;
-    image_desc.num_samples = 0;
-    image_desc.mem_object = buffer;
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
-    }
-
-    return mImage;
-}
-
 
 #endif

diff --git a/test_common/harness/cl_offline_compiler-interface.txt b/test_common/harness/cl_offline_compiler-interface.txt
index fd6997d..30ab182 100644
--- a/test_common/harness/cl_offline_compiler-interface.txt
+++ b/test_common/harness/cl_offline_compiler-interface.txt

@@ -23,5 +23,3 @@
    CL_DEVICE_EXTENSIONS="<space separated list of CL extensions>"
    CL_DEVICE_IL_VERSION="<space separated list of IL versions>"
    CL_DEVICE_VERSION="OpenCL <version> <vendor info>"
-   CL_DEVICE_IMAGE_SUPPORT=<0|1>
-   CL_DEVICE_NAME="device name"

diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index 7aad15a..3858a7c 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,14 +16,14 @@
 #ifndef _COMPAT_H_
 #define _COMPAT_H_
 
-#if defined(_WIN32) && defined(_MSC_VER)
+#if defined(_WIN32) && defined (_MSC_VER)
 #include <Windows.h>
 #endif
 
 #ifdef __cplusplus
-#define EXTERN_C extern "C"
+    #define EXTERN_C extern "C"
 #else
-#define EXTERN_C
+    #define EXTERN_C
 #endif
 
 
@@ -31,11 +31,11 @@
 // stdlib.h
 //
 
-#include <stdlib.h> // On Windows, _MAX_PATH defined there.
+#include <stdlib.h>     // On Windows, _MAX_PATH defined there.
 
 // llabs appeared in MS C v16 (VS 10/2010).
-#if defined(_MSC_VER) && _MSC_VER <= 1500
-EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
+#if defined( _MSC_VER ) && _MSC_VER <= 1500
+    EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
 #endif
 
 
@@ -44,15 +44,16 @@
 //
 
 // stdbool.h appeared in MS C v18 (VS 12/2013).
-#if defined(_MSC_VER) && MSC_VER <= 1700
+#if defined( _MSC_VER ) && MSC_VER <= 1700
 #if !defined(__cplusplus)
 typedef char bool;
-#define true 1
-#define false 0
-#endif
+        #define true  1
+        #define false 0
+    #endif
 #else
-#include <stdbool.h>
-#endif // defined(_MSC_VER) && MSC_VER <= 1700
+    #include <stdbool.h>
+#endif
+
 
 
 //
@@ -60,25 +61,24 @@
 //
 
 // stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
-#if defined(_MSC_VER)                                                          \
-    && (!defined(__INTEL_COMPILER) && _MSC_VER <= 1500                         \
-        || defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1200)
-typedef unsigned char uint8_t;
-typedef char int8_t;
-typedef unsigned short uint16_t;
-typedef short int16_t;
-typedef unsigned int uint32_t;
-typedef int int32_t;
-typedef unsigned long long uint64_t;
-typedef long long int64_t;
+#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
+typedef unsigned char       uint8_t;
+typedef char                int8_t;
+typedef unsigned short      uint16_t;
+typedef short               int16_t;
+typedef unsigned int        uint32_t;
+typedef int                 int32_t;
+typedef unsigned long long  uint64_t;
+typedef long long           int64_t;
 #else
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS
 #endif
-#include <stdint.h>
+    #include <stdint.h>
 #endif
 
 
+
 //
 // float.h
 //
@@ -86,23 +86,24 @@
 #include <float.h>
 
 
+
 //
 // fenv.h
 //
 
 // fenv.h appeared in MS C v18 (VS 12/2013).
-#if defined(_MSC_VER) && _MSC_VER <= 1700 && !defined(__INTEL_COMPILER)
-// reimplement fenv.h because windows doesn't have it
-#define FE_INEXACT 0x0020
-#define FE_UNDERFLOW 0x0010
-#define FE_OVERFLOW 0x0008
-#define FE_DIVBYZERO 0x0004
-#define FE_INVALID 0x0001
-#define FE_ALL_EXCEPT 0x003D
-int fetestexcept(int excepts);
-int feclearexcept(int excepts);
+#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
+    // reimplement fenv.h because windows doesn't have it
+    #define FE_INEXACT          0x0020
+    #define FE_UNDERFLOW        0x0010
+    #define FE_OVERFLOW         0x0008
+    #define FE_DIVBYZERO        0x0004
+    #define FE_INVALID          0x0001
+    #define FE_ALL_EXCEPT       0x003D
+    int fetestexcept(int excepts);
+    int feclearexcept(int excepts);
 #else
-#include <fenv.h>
+    #include <fenv.h>
 #endif
 
 
@@ -110,137 +111,138 @@
 // math.h
 //
 
-#if defined(__INTEL_COMPILER)
-#include <mathimf.h>
+#if defined( __INTEL_COMPILER )
+    #include <mathimf.h>
 #else
-#include <math.h>
+    #include <math.h>
 #endif
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846264338327950288
+    #define M_PI 3.14159265358979323846264338327950288
 #endif
 
-#if defined(_MSC_VER)
+#if defined( _MSC_VER )
 
-#ifdef __cplusplus
-extern "C" {
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
+
+    #ifndef NAN
+        #define NAN  (INFINITY - INFINITY)
+    #endif
+
+    #ifndef HUGE_VALF
+        #define HUGE_VALF (float)HUGE_VAL
+    #endif
+
+    #ifndef INFINITY
+        #define INFINITY    (FLT_MAX + FLT_MAX)
+    #endif
+
+    #ifndef isfinite
+        #define isfinite(x) _finite(x)
+    #endif
+
+    #ifndef isnan
+        #define    isnan( x )       ((x) != (x))
+    #endif
+
+    #ifndef isinf
+        #define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
+    #endif
+        
+    #if _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
+
+        double rint( double x);
+        float  rintf( float x);
+        long double rintl( long double x);
+
+        float cbrtf( float );
+        double cbrt( double );
+
+        int    ilogb( double x);
+        int    ilogbf (float x);
+        int    ilogbl(long double x);
+
+        double fmax(double x, double y);
+        double fmin(double x, double y);
+        float  fmaxf( float x, float y );
+        float  fminf(float x, float y);
+
+        double      log2(double x);
+        long double log2l(long double x);
+
+        double      exp2(double x);
+        long double exp2l(long double x);
+
+        double      fdim(double x, double y);
+        float       fdimf(float x, float y);
+        long double fdiml(long double x, long double y);
+
+        double      remquo( double x, double y, int *quo);
+        float       remquof( float x, float y, int *quo);
+        long double remquol( long double x, long double y, int *quo);
+
+        long double scalblnl(long double x, long n);
+
+        float hypotf(float x, float y);
+        long double hypotl(long double x, long double y) ;
+        double lgamma(double x);
+        float  lgammaf(float x);
+
+        double trunc(double x);
+        float  truncf(float x);
+
+        double log1p(double x);
+        float  log1pf(float x);
+        long double log1pl(long double x);
+
+        double copysign(double x, double y);
+        float  copysignf(float x, float y);
+        long double copysignl(long double x, long double y);
+
+        long lround(double x);
+        long lroundf(float x);
+        //long lroundl(long double x)
+
+        double round(double x);
+        float  roundf(float x);
+        long double roundl(long double x);
+
+        int cf_signbit(double x);
+        int cf_signbitf(float x);
+
+        // Added in _MSC_VER == 1800 (Visual Studio 2013)
+        #if _MSC_VER < 1800
+                static int signbit(double x) { return  cf_signbit(x); }
+        #endif
+        static int signbitf(float x) { return cf_signbitf(x); }
+
+        long int lrint (double flt);
+        long int lrintf (float flt);
+
+        float   int2float (int32_t ix);
+        int32_t float2int (float   fx);
+
+    #endif // _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
+
+    #if _MSC_VER < 1900 && ( ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 )
+        // These functions appeared in Intel C v13 and Visual Studio 2015
+        float  nanf( const char* str);
+        double nan( const char* str);
+        long double nanl( const char* str);
+    #endif
+
+    #ifdef __cplusplus
+        }
+    #endif
+
 #endif
 
-#ifndef NAN
-#define NAN (INFINITY - INFINITY)
+#if defined( __ANDROID__ )
+    #define log2(X)  (log(X)/log(2))
 #endif
 
-#ifndef HUGE_VALF
-#define HUGE_VALF (float)HUGE_VAL
-#endif
-
-#ifndef INFINITY
-#define INFINITY (FLT_MAX + FLT_MAX)
-#endif
-
-#ifndef isfinite
-#define isfinite(x) _finite(x)
-#endif
-
-#ifndef isnan
-#define isnan(x) ((x) != (x))
-#endif
-
-#ifndef isinf
-#define isinf(_x) ((_x) == INFINITY || (_x) == -INFINITY)
-#endif
-
-#if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
-
-double rint(double x);
-float rintf(float x);
-long double rintl(long double x);
-
-float cbrtf(float);
-double cbrt(double);
-
-int ilogb(double x);
-int ilogbf(float x);
-int ilogbl(long double x);
-
-double fmax(double x, double y);
-double fmin(double x, double y);
-float fmaxf(float x, float y);
-float fminf(float x, float y);
-
-double log2(double x);
-long double log2l(long double x);
-
-double exp2(double x);
-long double exp2l(long double x);
-
-double fdim(double x, double y);
-float fdimf(float x, float y);
-long double fdiml(long double x, long double y);
-
-double remquo(double x, double y, int* quo);
-float remquof(float x, float y, int* quo);
-long double remquol(long double x, long double y, int* quo);
-
-long double scalblnl(long double x, long n);
-
-float hypotf(float x, float y);
-long double hypotl(long double x, long double y);
-double lgamma(double x);
-float lgammaf(float x);
-
-double trunc(double x);
-float truncf(float x);
-
-double log1p(double x);
-float log1pf(float x);
-long double log1pl(long double x);
-
-double copysign(double x, double y);
-float copysignf(float x, float y);
-long double copysignl(long double x, long double y);
-
-long lround(double x);
-long lroundf(float x);
-// long lroundl(long double x)
-
-double round(double x);
-float roundf(float x);
-long double roundl(long double x);
-
-int cf_signbit(double x);
-int cf_signbitf(float x);
-
-// Added in _MSC_VER == 1800 (Visual Studio 2013)
-#if _MSC_VER < 1800
-static int signbit(double x) { return cf_signbit(x); }
-#endif
-static int signbitf(float x) { return cf_signbitf(x); }
-
-long int lrint(double flt);
-long int lrintf(float flt);
-
-float int2float(int32_t ix);
-int32_t float2int(float fx);
-
-#endif // _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
-
-#if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
-// These functions appeared in Intel C v13 and Visual Studio 2015
-float nanf(const char* str);
-double nan(const char* str);
-long double nanl(const char* str);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // defined(_MSC_VER)
-
-#if defined(__ANDROID__)
-#define log2(X) (log(X) / log(2))
-#endif
 
 
 //
@@ -248,11 +250,12 @@
 //
 
 #if defined(_MSC_VER)
-// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
-#if _MSC_VER < 1900
-#define snprintf sprintf_s
+	// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
+	#if _MSC_VER < 1900
+		#define snprintf   sprintf_s
+	#endif
 #endif
-#endif // defined(_MSC_VER)
+
 
 
 //
@@ -260,32 +263,35 @@
 //
 
 #if defined(_MSC_VER)
-#define strtok_r strtok_s
+    #define strtok_r strtok_s
 #endif
 
 
+
 //
 // unistd.h
 //
 
-#if defined(_MSC_VER)
-EXTERN_C unsigned int sleep(unsigned int sec);
-EXTERN_C int usleep(int usec);
+#if defined( _MSC_VER )
+    EXTERN_C unsigned int sleep( unsigned int sec );
+    EXTERN_C int usleep( int usec );
 #endif
 
 
+
 //
 // syscall.h
 //
 
-#if defined(__ANDROID__)
-// Android bionic's isn't providing SYS_sysctl wrappers.
-#define SYS__sysctl __NR__sysctl
+#if defined( __ANDROID__ )
+    // Android bionic's isn't providing SYS_sysctl wrappers.
+    #define SYS__sysctl  __NR__sysctl
 #endif
 
 
+
 // Some tests use _malloca which defined in malloc.h.
-#if !defined(__APPLE__)
+#if !defined (__APPLE__)
 #include <malloc.h>
 #endif
 
@@ -294,115 +300,104 @@
 // ???
 //
 
-#if defined(_MSC_VER)
+#if defined( _MSC_VER )
 
-#define MAXPATHLEN _MAX_PATH
+    #define MAXPATHLEN _MAX_PATH
 
-EXTERN_C uint64_t ReadTime(void);
-EXTERN_C double SubtractTime(uint64_t endTime, uint64_t startTime);
+    EXTERN_C uint64_t ReadTime( void );
+    EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
 
 /** Returns the number of leading 0-bits in x,
     starting at the most significant bit position.
     If x is 0, the result is undefined.
 */
-EXTERN_C int __builtin_clz(unsigned int pattern);
+    EXTERN_C int __builtin_clz(unsigned int pattern);
 
 #endif
 
 #ifndef MIN
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+    #define MIN(x,y) (((x)<(y))?(x):(y))
 #endif
 #ifndef MAX
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+    #define MAX(x,y) (((x)>(y))?(x):(y))
 #endif
 
 
-/*-----------------------------------------------------------------------------
-   WARNING: DO NOT USE THESE MACROS:
-        MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
+/*
+    ------------------------------------------------------------------------------------------------
+    WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
 
-   This is a typical usage of the macros:
+    This is a typical usage of the macros:
 
-     double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
+        double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
 
-   (taken from math_brute_force/reference_math.c). There are two problems:
+     (taken from math_brute_force/reference_math.c). There are two problems:
 
-     1. There is an error here. On Windows in will produce incorrect result
-        `0x1.5555555555555p+50'.
-        To have a correct result it should be written as:
-           MAKE_HEX_DOUBLE(0x1.5555555555555p-2, 0x15555555555555LL, -54)
-        A proper value of the third argument is not obvious -- sometimes it
-        should be the same as exponent of the first argument, but sometimes
-        not.
+        1.  There is an error here. On Windows in will produce incorrect result
+            `0x1.5555555555555p+50'. To have a correct result it should be written as
+            `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
+            third argument is not obvious -- sometimes it should be the same as exponent of the
+            first argument, but sometimes not.
 
-     2. Information is duplicated. It is easy to make a mistake.
+        2.  Information is duplicated. It is easy to make a mistake.
 
-   Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead
-   (see them in the bottom of the file).
------------------------------------------------------------------------------*/
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
 
-#define MAKE_HEX_FLOAT(x, y, z) ((float)ldexp((float)(y), z))
-#define MAKE_HEX_DOUBLE(x, y, z) ldexp((double)(y), z)
-#define MAKE_HEX_LONG(x, y, z) ((long double)ldexp((long double)(y), z))
+    #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
+    #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
+    #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
 
 #else
 
 // Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
-#define MAKE_HEX_FLOAT(x, y, z) x
-#define MAKE_HEX_DOUBLE(x, y, z) x
-#define MAKE_HEX_LONG(x, y, z) x
+#define MAKE_HEX_FLOAT(x,y,z) x
+#define MAKE_HEX_DOUBLE(x,y,z) x
+#define MAKE_HEX_LONG(x,y,z) x
 
 #endif
 
 
-/*-----------------------------------------------------------------------------
-   HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type
-   float, double, long double respectively. Arguments:
+/*
+    ------------------------------------------------------------------------------------------------
+    HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
+    double respectively. Arguments:
 
-      sm    -- sign of number,
-      int   -- integer part of mantissa (without `0x' prefix),
-      fract -- fractional part of mantissa (without decimal point and `L' or
-            `LL' suffixes),
-      se    -- sign of exponent,
-      exp   -- absolute value of (binary) exponent.
+        sm    -- sign of number,
+        int   -- integer part of mantissa (without `0x' prefix),
+        fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
+        se    -- sign of exponent,
+        exp   -- absolute value of (binary) exponent.
 
-   Example:
+    Example:
 
-      double yhi = HEX_DBL(+, 1, 5555555555555, -, 2); // 0x1.5555555555555p-2
+        double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
 
-   Note:
+    Note:
 
-      We have to pass signs as separate arguments because gcc pass negative
-   integer values (e. g. `-2') into a macro as two separate tokens, so
-   `HEX_FLT(1, 0, -2)' produces result `0x1.0p- 2' (note a space between minus
-   and two) which is not a correct floating point literal.
------------------------------------------------------------------------------*/
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-// If compiler does not support hex floating point literals:
-#define HEX_FLT(sm, int, fract, se, exp)                                       \
-    sm ldexpf((float)(0x##int##fract##UL),                                     \
-              se exp + ilogbf((float)0x##int)                                  \
-                  - ilogbf((float)(0x##int##fract##UL)))
-#define HEX_DBL(sm, int, fract, se, exp)                                       \
-    sm ldexp((double)(0x##int##fract##ULL),                                    \
-             se exp + ilogb((double)0x##int)                                   \
-                 - ilogb((double)(0x##int##fract##ULL)))
-#define HEX_LDBL(sm, int, fract, se, exp)                                      \
-    sm ldexpl((long double)(0x##int##fract##ULL),                              \
-              se exp + ilogbl((long double)0x##int)                            \
-                  - ilogbl((long double)(0x##int##fract##ULL)))
+        We have to pass signs as separate arguments because gcc pass negative integer values
+        (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
+        `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
+        literal.
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    // If compiler does not support hex floating point literals:
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm ldexpf(       (float)( 0x ## int ## fract ## UL  ), se exp + ilogbf(       (float) 0x ## int ) - ilogbf(       ( float )( 0x ## int ## fract ## UL  ) ) )
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm ldexp(       (double)( 0x ## int ## fract ## ULL ), se exp + ilogb(       (double) 0x ## int ) - ilogb(       ( double )( 0x ## int ## fract ## ULL ) ) )
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
 #else
-// If compiler supports hex floating point literals: just concatenate all the
-// parts into a literal.
-#define HEX_FLT(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##F
-#define HEX_DBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp
-#define HEX_LDBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##L
+    // If compiler supports hex floating point literals: just concatenate all the parts into a literal.
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
 #endif
 
 #if defined(__MINGW32__)
-#include <Windows.h>
-#define sleep(sec) Sleep((sec)*1000)
+    #include <Windows.h>
+    #define sleep(sec) Sleep((sec) * 1000)
 #endif
 
 #endif // _COMPAT_H_

diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index fc3317c..72fd8cb 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,101 +20,123 @@
 #include "mt19937.h"
 #include "compat.h"
 
-#if defined(__SSE__) || defined(_MSC_VER)
-#include <xmmintrin.h>
+#if defined( __SSE__ ) || defined (_MSC_VER)
+    #include <xmmintrin.h>
 #endif
-#if defined(__SSE2__) || defined(_MSC_VER)
-#include <emmintrin.h>
+#if defined( __SSE2__ ) || defined (_MSC_VER)
+    #include <emmintrin.h>
 #endif
 
-void print_type_to_string(ExplicitType type, void *data, char *string)
-{
-    switch (type)
-    {
-        case kBool:
-            if (*(char *)data)
-                sprintf(string, "true");
-            else
-                sprintf(string, "false");
+void print_type_to_string(ExplicitType type, void *data, char* string) {
+     switch (type) {
+       case kBool:
+      if (*(char*)data)
+        sprintf(string, "true");
+      else
+        sprintf(string, "false");
             return;
-        case kChar: sprintf(string, "%d", (int)*((cl_char *)data)); return;
-        case kUChar:
-        case kUnsignedChar:
-            sprintf(string, "%u", (int)*((cl_uchar *)data));
-            return;
-        case kShort: sprintf(string, "%d", (int)*((cl_short *)data)); return;
-        case kUShort:
-        case kUnsignedShort:
-            sprintf(string, "%u", (int)*((cl_ushort *)data));
-            return;
-        case kInt: sprintf(string, "%d", *((cl_int *)data)); return;
-        case kUInt:
-        case kUnsignedInt: sprintf(string, "%u", *((cl_uint *)data)); return;
-        case kLong: sprintf(string, "%lld", *((cl_long *)data)); return;
-        case kULong:
-        case kUnsignedLong:
-            sprintf(string, "%llu", *((cl_ulong *)data));
-            return;
-        case kFloat: sprintf(string, "%f", *((cl_float *)data)); return;
-        case kHalf: sprintf(string, "half"); return;
-        case kDouble: sprintf(string, "%g", *((cl_double *)data)); return;
-        default: sprintf(string, "INVALID"); return;
-    }
+    case kChar:
+      sprintf(string, "%d", (int)*((cl_char*)data));
+      return;
+    case kUChar:
+    case kUnsignedChar:
+      sprintf(string, "%u", (int)*((cl_uchar*)data));
+      return;
+    case kShort:
+      sprintf(string, "%d", (int)*((cl_short*)data));
+      return;
+    case kUShort:
+    case kUnsignedShort:
+      sprintf(string, "%u", (int)*((cl_ushort*)data));
+      return;
+    case kInt:
+      sprintf(string, "%d", *((cl_int*)data));
+      return;
+    case kUInt:
+    case kUnsignedInt:
+      sprintf(string, "%u", *((cl_uint*)data));
+      return;
+    case kLong:
+      sprintf(string, "%lld", *((cl_long*)data));
+      return;
+    case kULong:
+    case kUnsignedLong:
+      sprintf(string, "%llu", *((cl_ulong*)data));
+      return;
+    case kFloat:
+      sprintf(string, "%f", *((cl_float*)data));
+      return;
+    case kHalf:
+      sprintf(string, "half");
+      return;
+    case kDouble:
+      sprintf(string, "%g", *((cl_double*)data));
+      return;
+    default:
+      sprintf(string, "INVALID");
+      return;
+  }
+
 }
 
-size_t get_explicit_type_size(ExplicitType type)
+size_t get_explicit_type_size( ExplicitType type )
 {
-    /* Quick method to avoid branching: make sure the following array matches
-     * the Enum order */
-    static size_t sExplicitTypeSizes[] = {
-        sizeof(cl_bool),   sizeof(cl_char),  sizeof(cl_uchar),
-        sizeof(cl_uchar),  sizeof(cl_short), sizeof(cl_ushort),
-        sizeof(cl_ushort), sizeof(cl_int),   sizeof(cl_uint),
-        sizeof(cl_uint),   sizeof(cl_long),  sizeof(cl_ulong),
-        sizeof(cl_ulong),  sizeof(cl_float), sizeof(cl_half),
-        sizeof(cl_double)
-    };
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static size_t    sExplicitTypeSizes[] = {
+            sizeof( cl_bool ),
+            sizeof( cl_char ),
+            sizeof( cl_uchar ),
+            sizeof( cl_uchar ),
+            sizeof( cl_short ),
+            sizeof( cl_ushort ),
+            sizeof( cl_ushort ),
+            sizeof( cl_int ),
+            sizeof( cl_uint ),
+            sizeof( cl_uint ),
+            sizeof( cl_long ),
+            sizeof( cl_ulong ),
+            sizeof( cl_ulong ),
+            sizeof( cl_float ),
+            sizeof( cl_half ),
+            sizeof( cl_double )
+        };
 
-    return sExplicitTypeSizes[type];
+    return sExplicitTypeSizes[ type ];
 }
 
-const char *get_explicit_type_name(ExplicitType type)
+const char * get_explicit_type_name( ExplicitType type )
 {
-    /* Quick method to avoid branching: make sure the following array matches
-     * the Enum order */
-    static const char *sExplicitTypeNames[] = {
-        "bool",           "char",  "uchar", "unsigned char", "short", "ushort",
-        "unsigned short", "int",   "uint",  "unsigned int",  "long",  "ulong",
-        "unsigned long",  "float", "half",  "double"
-    };
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static const char *sExplicitTypeNames[] = { "bool", "char", "uchar", "unsigned char", "short", "ushort", "unsigned short", "int",
+                            "uint", "unsigned int", "long", "ulong", "unsigned long", "float", "half", "double" };
 
-    return sExplicitTypeNames[type];
+    return sExplicitTypeNames[ type ];
 }
 
-static long lrintf_clamped(float f);
-static long lrintf_clamped(float f)
+static long lrintf_clamped( float f );
+static long lrintf_clamped( float f )
 {
-    static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
-                                    -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
+    static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
 
-    if (f >= -(float)LONG_MIN) return LONG_MAX;
+    if( f >= -(float) LONG_MIN )
+        return LONG_MAX;
 
-    if (f <= (float)LONG_MIN) return LONG_MIN;
+    if( f <= (float) LONG_MIN )
+        return LONG_MIN;
 
     // Round fractional values to integer in round towards nearest mode
-    if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23))
+    if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
     {
         volatile float x = f;
-        float magicVal = magic[f < 0];
+        float magicVal = magic[ f < 0 ];
 
-#if defined(__SSE__) || defined(_WIN32)
-        // Defeat x87 based arithmetic, which cant do FTZ, and will round this
-        // incorrectly
-        __m128 v = _mm_set_ss(x);
-        __m128 m = _mm_set_ss(magicVal);
-        v = _mm_add_ss(v, m);
-        v = _mm_sub_ss(v, m);
-        _mm_store_ss((float *)&x, v);
+#if defined( __SSE__ ) || defined (_WIN32)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128 v = _mm_set_ss( x );
+        __m128 m = _mm_set_ss( magicVal );
+        v = _mm_add_ss( v, m );
+        v = _mm_sub_ss( v, m );
+        _mm_store_ss( (float*) &x, v );
 #else
         x += magicVal;
         x -= magicVal;
@@ -122,39 +144,40 @@
         f = x;
     }
 
-    return (long)f;
+    return (long) f;
 }
 
-static long lrint_clamped(double f);
-static long lrint_clamped(double f)
+static long lrint_clamped( double f );
+static long lrint_clamped( double f )
 {
-    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
-                                     MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
 
-    if (sizeof(long) > 4)
+    if( sizeof( long ) > 4 )
     {
-        if (f >= -(double)LONG_MIN) return LONG_MAX;
+        if( f >= -(double) LONG_MIN )
+            return LONG_MAX;
     }
     else
     {
-        if (f >= LONG_MAX) return LONG_MAX;
+        if( f >= LONG_MAX )
+            return LONG_MAX;
     }
 
-    if (f <= (double)LONG_MIN) return LONG_MIN;
+    if( f <= (double) LONG_MIN )
+        return LONG_MIN;
 
     // Round fractional values to integer in round towards nearest mode
-    if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52))
+    if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
     {
         volatile double x = f;
-        double magicVal = magic[f < 0];
-#if defined(__SSE2__) || (defined(_MSC_VER))
-        // Defeat x87 based arithmetic, which cant do FTZ, and will round this
-        // incorrectly
-        __m128d v = _mm_set_sd(x);
-        __m128d m = _mm_set_sd(magicVal);
-        v = _mm_add_sd(v, m);
-        v = _mm_sub_sd(v, m);
-        _mm_store_sd((double *)&x, v);
+        double magicVal = magic[ f < 0 ];
+#if defined( __SSE2__ ) || (defined (_MSC_VER))
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128d v = _mm_set_sd( x );
+        __m128d m = _mm_set_sd( magicVal );
+        v = _mm_add_sd( v, m );
+        v = _mm_sub_sd( v, m );
+        _mm_store_sd( (double*) &x, v );
 #else
         x += magicVal;
         x -= magicVal;
@@ -162,236 +185,195 @@
         f = x;
     }
 
-    return (long)f;
+    return (long) f;
 }
 
 
 typedef cl_long Long;
 typedef cl_ulong ULong;
 
-static ULong sUpperLimits[kNumExplicitTypes] = {
-    0,
-    127,
-    255,
-    255,
-    32767,
-    65535,
-    65535,
-    0x7fffffffLL,
-    0xffffffffLL,
-    0xffffffffLL,
-    0x7fffffffffffffffLL,
-    0xffffffffffffffffLL,
-    0xffffffffffffffffLL,
-    0,
-    0
-}; // Last two values aren't stored here
+static ULong sUpperLimits[ kNumExplicitTypes ] =
+    {
+        0,
+        127, 255, 255,
+        32767, 65535, 65535,
+        0x7fffffffLL, 0xffffffffLL, 0xffffffffLL,
+        0x7fffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL,
+        0, 0 };    // Last two values aren't stored here
 
-static Long sLowerLimits[kNumExplicitTypes] = {
-    -1,
-    -128,
-    0,
-    0,
-    -32768,
-    0,
-    0,
-    (Long)0xffffffff80000000LL,
-    0,
-    0,
-    (Long)0x8000000000000000LL,
-    0,
-    0,
-    0,
-    0
-}; // Last two values aren't stored here
+static Long sLowerLimits[ kNumExplicitTypes ] =
+    {
+        -1,
+        -128, 0, 0,
+        -32768, 0, 0,
+        0xffffffff80000000LL, 0, 0,
+        0x8000000000000000LL, 0, 0,
+        0, 0 };    // Last two values aren't stored here
 
-#define BOOL_CASE(inType)                                                      \
-    case kBool:                                                                \
-        boolPtr = (bool *)outRaw;                                              \
-        *boolPtr = (*inType##Ptr) != 0 ? true : false;                         \
+#define BOOL_CASE(inType) \
+        case kBool:    \
+            boolPtr = (bool *)outRaw; \
+            *boolPtr = ( *inType##Ptr ) != 0 ? true : false; \
+            break;
+
+#define SIMPLE_CAST_CASE(inType,outEnum,outType) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            *outType##Ptr = (outType)(*inType##Ptr);    \
+            break;
+
+// Sadly, the ULong downcasting cases need a separate #define to get rid of signed/unsigned comparison warnings
+#define DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && *inType##Ptr > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)*inType##Ptr > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( *inType##Ptr < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+
+#define U_DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( (ULong)*inType##Ptr > sUpperLimits[outEnum] )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+
+#define TO_FLOAT_CASE(inType)                \
+        case kFloat:                        \
+            floatPtr = (float *)outRaw;        \
+            *floatPtr = (float)(*inType##Ptr);    \
+            break;
+#define TO_DOUBLE_CASE(inType)                \
+        case kDouble:                        \
+            doublePtr = (double *)outRaw;        \
+            *doublePtr = (double)(*inType##Ptr);    \
+            break;
+
+
+/* Note: we use lrintf here to force the rounding instead of whatever the processor's current rounding mode is */
+#define FLOAT_ROUND_TO_NEAREST_CASE(outEnum,outType)    \
+        case outEnum:                                    \
+            outType##Ptr = (outType *)outRaw;            \
+            *outType##Ptr = (outType)lrintf_clamped( *floatPtr );    \
+            break;
+
+#define FLOAT_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*floatPtr;\
+            float largeRemainder = ( *floatPtr - (float)wholeValue ) * 10.f; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.f && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.f && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrintf_clamped( *floatPtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
         break;
 
-#define SIMPLE_CAST_CASE(inType, outEnum, outType)                             \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        *outType##Ptr = (outType)(*inType##Ptr);                               \
+#define DOUBLE_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*doublePtr;\
+            double largeRemainder = ( *doublePtr - (double)wholeValue ) * 10.0; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.0 && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.0 && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrint_clamped( *doublePtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
         break;
 
-// Sadly, the ULong downcasting cases need a separate #define to get rid of
-// signed/unsigned comparison warnings
-#define DOWN_CAST_CASE(inType, outEnum, outType, sat)                          \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((sLowerLimits[outEnum] < 0                                     \
-                 && *inType##Ptr > (Long)sUpperLimits[outEnum])                \
-                || (sLowerLimits[outEnum] == 0                                 \
-                    && (ULong)*inType##Ptr > sUpperLimits[outEnum]))           \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else if (*inType##Ptr < sLowerLimits[outEnum])                     \
-                *outType##Ptr = (outType)sLowerLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)*inType##Ptr;                         \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                *inType##Ptr                                                   \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-        break;
-
-#define U_DOWN_CAST_CASE(inType, outEnum, outType, sat)                        \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((ULong)*inType##Ptr > sUpperLimits[outEnum])                   \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)*inType##Ptr;                         \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                *inType##Ptr                                                   \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-        break;
-
-#define TO_FLOAT_CASE(inType)                                                  \
-    case kFloat:                                                               \
-        floatPtr = (float *)outRaw;                                            \
-        *floatPtr = (float)(*inType##Ptr);                                     \
-        break;
-#define TO_DOUBLE_CASE(inType)                                                 \
-    case kDouble:                                                              \
-        doublePtr = (double *)outRaw;                                          \
-        *doublePtr = (double)(*inType##Ptr);                                   \
-        break;
-
-
-/* Note: we use lrintf here to force the rounding instead of whatever the
- * processor's current rounding mode is */
-#define FLOAT_ROUND_TO_NEAREST_CASE(outEnum, outType)                          \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        *outType##Ptr = (outType)lrintf_clamped(*floatPtr);                    \
-        break;
-
-#define FLOAT_ROUND_CASE(outEnum, outType, rounding, sat)                      \
-    case outEnum: {                                                            \
-        outType##Ptr = (outType *)outRaw;                                      \
-        /* Get the tens digit */                                               \
-        Long wholeValue = (Long)*floatPtr;                                     \
-        float largeRemainder = (*floatPtr - (float)wholeValue) * 10.f;         \
-        /* What do we do based on that? */                                     \
-        if (rounding == kRoundToEven)                                          \
-        {                                                                      \
-            if (wholeValue & 1LL) /*between 1 and 1.99 */                      \
-                wholeValue += 1LL; /* round up to even */                      \
-        }                                                                      \
-        else if (rounding == kRoundToZero)                                     \
-        {                                                                      \
-            /* Nothing to do, round-to-zero is what C casting does */          \
-        }                                                                      \
-        else if (rounding == kRoundToPosInf)                                   \
-        {                                                                      \
-            /* Only positive numbers are wrong */                              \
-            if (largeRemainder != 0.f && wholeValue >= 0) wholeValue++;        \
-        }                                                                      \
-        else if (rounding == kRoundToNegInf)                                   \
-        {                                                                      \
-            /* Only negative numbers are off */                                \
-            if (largeRemainder != 0.f && wholeValue < 0) wholeValue--;         \
-        }                                                                      \
-        else                                                                   \
-        { /* Default is round-to-nearest */                                    \
-            wholeValue = (Long)lrintf_clamped(*floatPtr);                      \
-        }                                                                      \
-        /* Now apply saturation rules */                                       \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((sLowerLimits[outEnum] < 0                                     \
-                 && wholeValue > (Long)sUpperLimits[outEnum])                  \
-                || (sLowerLimits[outEnum] == 0                                 \
-                    && (ULong)wholeValue > sUpperLimits[outEnum]))             \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else if (wholeValue < sLowerLimits[outEnum])                       \
-                *outType##Ptr = (outType)sLowerLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)wholeValue;                           \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                wholeValue                                                     \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-    }                                                                          \
-    break;
-
-#define DOUBLE_ROUND_CASE(outEnum, outType, rounding, sat)                     \
-    case outEnum: {                                                            \
-        outType##Ptr = (outType *)outRaw;                                      \
-        /* Get the tens digit */                                               \
-        Long wholeValue = (Long)*doublePtr;                                    \
-        double largeRemainder = (*doublePtr - (double)wholeValue) * 10.0;      \
-        /* What do we do based on that? */                                     \
-        if (rounding == kRoundToEven)                                          \
-        {                                                                      \
-            if (wholeValue & 1LL) /*between 1 and 1.99 */                      \
-                wholeValue += 1LL; /* round up to even */                      \
-        }                                                                      \
-        else if (rounding == kRoundToZero)                                     \
-        {                                                                      \
-            /* Nothing to do, round-to-zero is what C casting does */          \
-        }                                                                      \
-        else if (rounding == kRoundToPosInf)                                   \
-        {                                                                      \
-            /* Only positive numbers are wrong */                              \
-            if (largeRemainder != 0.0 && wholeValue >= 0) wholeValue++;        \
-        }                                                                      \
-        else if (rounding == kRoundToNegInf)                                   \
-        {                                                                      \
-            /* Only negative numbers are off */                                \
-            if (largeRemainder != 0.0 && wholeValue < 0) wholeValue--;         \
-        }                                                                      \
-        else                                                                   \
-        { /* Default is round-to-nearest */                                    \
-            wholeValue = (Long)lrint_clamped(*doublePtr);                      \
-        }                                                                      \
-        /* Now apply saturation rules */                                       \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((sLowerLimits[outEnum] < 0                                     \
-                 && wholeValue > (Long)sUpperLimits[outEnum])                  \
-                || (sLowerLimits[outEnum] == 0                                 \
-                    && (ULong)wholeValue > sUpperLimits[outEnum]))             \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else if (wholeValue < sLowerLimits[outEnum])                       \
-                *outType##Ptr = (outType)sLowerLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)wholeValue;                           \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                wholeValue                                                     \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-    }                                                                          \
-    break;
-
 typedef unsigned char uchar;
 typedef unsigned short ushort;
 typedef unsigned int uint;
 typedef unsigned long ulong;
 
-void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
-                            bool saturate, RoundingType roundType,
-                            ExplicitType outType)
+void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType )
 {
     bool *boolPtr;
     char *charPtr;
@@ -406,14 +388,14 @@
     double *doublePtr;
 
 
-    switch (inType)
+    switch( inType )
     {
         case kBool:
             boolPtr = (bool *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 case kBool:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
                 case kChar:
@@ -428,482 +410,464 @@
                 case kLong:
                 case kULong:
                 case kUnsignedLong:
-                    memset(outRaw, *boolPtr ? 0xff : 0,
-                           get_explicit_type_size(outType));
+                    memset( outRaw, *boolPtr ? 0xff : 0, get_explicit_type_size( outType ) );
                     break;
 
                 case kFloat:
                     floatPtr = (float *)outRaw;
-                    *floatPtr = (*boolPtr) ? -1.f : 0.f;
+                    *floatPtr = ( *boolPtr ) ? -1.f : 0.f;
                     break;
                 case kDouble:
                     doublePtr = (double *)outRaw;
-                    *doublePtr = (*boolPtr) ? -1.0 : 0.0;
+                    *doublePtr = ( *boolPtr ) ? -1.0 : 0.0;
                     break;
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kChar:
             charPtr = (char *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(char)
 
                 case kChar:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(char, kUChar, uchar, saturate)
-                    SIMPLE_CAST_CASE(char, kUnsignedChar, uchar)
-                    SIMPLE_CAST_CASE(char, kShort, short)
-                    SIMPLE_CAST_CASE(char, kUShort, ushort)
-                    SIMPLE_CAST_CASE(char, kUnsignedShort, ushort)
-                    SIMPLE_CAST_CASE(char, kInt, int)
-                    SIMPLE_CAST_CASE(char, kUInt, uint)
-                    SIMPLE_CAST_CASE(char, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(char, kLong, Long)
-                    SIMPLE_CAST_CASE(char, kULong, ULong)
-                    SIMPLE_CAST_CASE(char, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(char,kUChar,uchar,saturate)
+                SIMPLE_CAST_CASE(char,kUnsignedChar,uchar)
+                SIMPLE_CAST_CASE(char,kShort,short)
+                SIMPLE_CAST_CASE(char,kUShort,ushort)
+                SIMPLE_CAST_CASE(char,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(char,kInt,int)
+                SIMPLE_CAST_CASE(char,kUInt,uint)
+                SIMPLE_CAST_CASE(char,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(char,kLong,Long)
+                SIMPLE_CAST_CASE(char,kULong,ULong)
+                SIMPLE_CAST_CASE(char,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(char)
-                    TO_DOUBLE_CASE(char)
+                TO_FLOAT_CASE(char)
+                TO_DOUBLE_CASE(char)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kUChar:
             ucharPtr = (uchar *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(uchar)
 
                 case kUChar:
                 case kUnsignedChar:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(uchar, kChar, char, saturate)
-                    SIMPLE_CAST_CASE(uchar, kShort, short)
-                    SIMPLE_CAST_CASE(uchar, kUShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kInt, int)
-                    SIMPLE_CAST_CASE(uchar, kUInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kLong, Long)
-                    SIMPLE_CAST_CASE(uchar, kULong, ULong)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(uchar)
-                    TO_DOUBLE_CASE(uchar)
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kUnsignedChar:
             ucharPtr = (uchar *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(uchar)
 
                 case kUChar:
                 case kUnsignedChar:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(uchar, kChar, char, saturate)
-                    SIMPLE_CAST_CASE(uchar, kShort, short)
-                    SIMPLE_CAST_CASE(uchar, kUShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kInt, int)
-                    SIMPLE_CAST_CASE(uchar, kUInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kLong, Long)
-                    SIMPLE_CAST_CASE(uchar, kULong, ULong)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(uchar)
-                    TO_DOUBLE_CASE(uchar)
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kShort:
             shortPtr = (short *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(short)
 
                 case kShort:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(short, kChar, char, saturate)
-                    DOWN_CAST_CASE(short, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(short, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(short, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(short, kUnsignedShort, ushort, saturate)
-                    SIMPLE_CAST_CASE(short, kInt, int)
-                    SIMPLE_CAST_CASE(short, kUInt, uint)
-                    SIMPLE_CAST_CASE(short, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(short, kLong, Long)
-                    SIMPLE_CAST_CASE(short, kULong, ULong)
-                    SIMPLE_CAST_CASE(short, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(short,kChar,char,saturate)
+                DOWN_CAST_CASE(short,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(short,kUnsignedShort,ushort,saturate)
+                SIMPLE_CAST_CASE(short,kInt,int)
+                SIMPLE_CAST_CASE(short,kUInt,uint)
+                SIMPLE_CAST_CASE(short,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(short,kLong,Long)
+                SIMPLE_CAST_CASE(short,kULong,ULong)
+                SIMPLE_CAST_CASE(short,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(short)
-                    TO_DOUBLE_CASE(short)
+                TO_FLOAT_CASE(short)
+                TO_DOUBLE_CASE(short)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kUShort:
             ushortPtr = (ushort *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(ushort)
 
                 case kUShort:
                 case kUnsignedShort:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(ushort, kChar, char, saturate)
-                    DOWN_CAST_CASE(ushort, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kShort, short, saturate)
-                    SIMPLE_CAST_CASE(ushort, kInt, int)
-                    SIMPLE_CAST_CASE(ushort, kUInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kLong, Long)
-                    SIMPLE_CAST_CASE(ushort, kULong, ULong)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(ushort)
-                    TO_DOUBLE_CASE(ushort)
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kUnsignedShort:
             ushortPtr = (ushort *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(ushort)
 
                 case kUShort:
                 case kUnsignedShort:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(ushort, kChar, char, saturate)
-                    DOWN_CAST_CASE(ushort, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kShort, short, saturate)
-                    SIMPLE_CAST_CASE(ushort, kInt, int)
-                    SIMPLE_CAST_CASE(ushort, kUInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kLong, Long)
-                    SIMPLE_CAST_CASE(ushort, kULong, ULong)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(ushort)
-                    TO_DOUBLE_CASE(ushort)
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kInt:
             intPtr = (int *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(int)
 
                 case kInt:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(int, kChar, char, saturate)
-                    DOWN_CAST_CASE(int, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(int, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(int, kShort, short, saturate)
-                    DOWN_CAST_CASE(int, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(int, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(int, kUInt, uint, saturate)
-                    DOWN_CAST_CASE(int, kUnsignedInt, uint, saturate)
-                    SIMPLE_CAST_CASE(int, kLong, Long)
-                    SIMPLE_CAST_CASE(int, kULong, ULong)
-                    SIMPLE_CAST_CASE(int, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(int,kChar,char,saturate)
+                DOWN_CAST_CASE(int,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kShort,short,saturate)
+                DOWN_CAST_CASE(int,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUInt,uint,saturate)
+                DOWN_CAST_CASE(int,kUnsignedInt,uint,saturate)
+                SIMPLE_CAST_CASE(int,kLong,Long)
+                SIMPLE_CAST_CASE(int,kULong,ULong)
+                SIMPLE_CAST_CASE(int,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(int)
-                    TO_DOUBLE_CASE(int)
+                TO_FLOAT_CASE(int)
+                TO_DOUBLE_CASE(int)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kUInt:
             uintPtr = (uint *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(uint)
 
                 case kUInt:
                 case kUnsignedInt:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(uint, kChar, char, saturate)
-                    DOWN_CAST_CASE(uint, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kShort, short, saturate)
-                    DOWN_CAST_CASE(uint, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kInt, int, saturate)
-                    SIMPLE_CAST_CASE(uint, kLong, Long)
-                    SIMPLE_CAST_CASE(uint, kULong, ULong)
-                    SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(uint)
-                    TO_DOUBLE_CASE(uint)
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kUnsignedInt:
             uintPtr = (uint *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(uint)
 
                 case kUInt:
                 case kUnsignedInt:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(uint, kChar, char, saturate)
-                    DOWN_CAST_CASE(uint, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kShort, short, saturate)
-                    DOWN_CAST_CASE(uint, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kInt, int, saturate)
-                    SIMPLE_CAST_CASE(uint, kLong, Long)
-                    SIMPLE_CAST_CASE(uint, kULong, ULong)
-                    SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
 
-                    TO_FLOAT_CASE(uint)
-                    TO_DOUBLE_CASE(uint)
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kLong:
             LongPtr = (Long *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(Long)
 
                 case kLong:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    DOWN_CAST_CASE(Long, kChar, char, saturate)
-                    DOWN_CAST_CASE(Long, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(Long, kShort, short, saturate)
-                    DOWN_CAST_CASE(Long, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(Long, kInt, int, saturate)
-                    DOWN_CAST_CASE(Long, kUInt, uint, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedInt, uint, saturate)
-                    DOWN_CAST_CASE(Long, kULong, ULong, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedLong, ULong, saturate)
+                DOWN_CAST_CASE(Long,kChar,char,saturate)
+                DOWN_CAST_CASE(Long,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kShort,short,saturate)
+                DOWN_CAST_CASE(Long,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kInt,int,saturate)
+                DOWN_CAST_CASE(Long,kUInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kULong,ULong,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedLong,ULong,saturate)
 
-                    TO_FLOAT_CASE(Long)
-                    TO_DOUBLE_CASE(Long)
+                TO_FLOAT_CASE(Long)
+                TO_DOUBLE_CASE(Long)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kULong:
             ULongPtr = (ULong *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(ULong)
 
                 case kUnsignedLong:
                 case kULong:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    U_DOWN_CAST_CASE(ULong, kChar, char, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kShort, short, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kInt, int, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
 
-                    TO_FLOAT_CASE(ULong)
-                    TO_DOUBLE_CASE(ULong)
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kUnsignedLong:
             ULongPtr = (ULong *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(ULong)
 
                 case kULong:
                 case kUnsignedLong:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    U_DOWN_CAST_CASE(ULong, kChar, char, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kShort, short, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kInt, int, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
 
-                    TO_FLOAT_CASE(ULong)
-                    TO_DOUBLE_CASE(ULong)
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kFloat:
             floatPtr = (float *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(float)
 
-                FLOAT_ROUND_CASE(kChar, char, roundType, saturate)
-                FLOAT_ROUND_CASE(kUChar, uchar, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedChar, uchar, roundType, saturate)
-                FLOAT_ROUND_CASE(kShort, short, roundType, saturate)
-                FLOAT_ROUND_CASE(kUShort, ushort, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedShort, ushort, roundType, saturate)
-                FLOAT_ROUND_CASE(kInt, int, roundType, saturate)
-                FLOAT_ROUND_CASE(kUInt, uint, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedInt, uint, roundType, saturate)
-                FLOAT_ROUND_CASE(kLong, Long, roundType, saturate)
-                FLOAT_ROUND_CASE(kULong, ULong, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
+                FLOAT_ROUND_CASE(kChar,char,roundType,saturate)
+                FLOAT_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kShort,short,roundType,saturate)
+                FLOAT_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kInt,int,roundType,saturate)
+                FLOAT_ROUND_CASE(kUInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kLong,Long,roundType,saturate)
+                FLOAT_ROUND_CASE(kULong,ULong,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
 
                 case kFloat:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
-                    TO_DOUBLE_CASE(float);
+                TO_DOUBLE_CASE(float);
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         case kDouble:
             doublePtr = (double *)inRaw;
-            switch (outType)
+            switch( outType )
             {
                 BOOL_CASE(double)
 
-                DOUBLE_ROUND_CASE(kChar, char, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUChar, uchar, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedChar, uchar, roundType, saturate)
-                DOUBLE_ROUND_CASE(kShort, short, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUShort, ushort, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedShort, ushort, roundType, saturate)
-                DOUBLE_ROUND_CASE(kInt, int, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUInt, uint, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedInt, uint, roundType, saturate)
-                DOUBLE_ROUND_CASE(kLong, Long, roundType, saturate)
-                DOUBLE_ROUND_CASE(kULong, ULong, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
+                DOUBLE_ROUND_CASE(kChar,char,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kShort,short,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kInt,int,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kLong,Long,roundType,saturate)
+                DOUBLE_ROUND_CASE(kULong,ULong,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
 
                 TO_FLOAT_CASE(double);
 
                 case kDouble:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                     break;
 
                 default:
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
                     break;
             }
             break;
 
         default:
-            log_error(
-                "ERROR: Invalid type given to convert_explicit_value!!\n");
+            log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
             break;
     }
 }
 
-void generate_random_data(ExplicitType type, size_t count, MTdata d,
-                          void *outData)
+void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData )
 {
     bool *boolPtr;
     cl_char *charPtr;
@@ -916,93 +880,88 @@
     cl_ulong *ulongPtr;
     cl_float *floatPtr;
     cl_double *doublePtr;
-    cl_half *halfPtr;
+    cl_ushort *halfPtr;
     size_t i;
     cl_uint bits = genrand_int32(d);
     cl_uint bitsLeft = 32;
 
-    switch (type)
+    switch( type )
     {
         case kBool:
             boolPtr = (bool *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                 {
                     bits = genrand_int32(d);
                     bitsLeft = 32;
                 }
-                boolPtr[i] = (bits & 1) ? true : false;
-                bits >>= 1;
-                bitsLeft -= 1;
+                boolPtr[i] = ( bits & 1 ) ? true : false;
+                bits >>= 1; bitsLeft -= 1;
             }
             break;
 
         case kChar:
             charPtr = (cl_char *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                 {
                     bits = genrand_int32(d);
                     bitsLeft = 32;
                 }
-                charPtr[i] = (cl_char)((cl_int)(bits & 255) - 127);
-                bits >>= 8;
-                bitsLeft -= 8;
+                charPtr[i] = (cl_char)( (cl_int)(bits & 255 ) - 127 );
+                bits >>= 8; bitsLeft -= 8;
             }
             break;
 
         case kUChar:
         case kUnsignedChar:
             ucharPtr = (cl_uchar *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                 {
                     bits = genrand_int32(d);
                     bitsLeft = 32;
                 }
-                ucharPtr[i] = (cl_uchar)(bits & 255);
-                bits >>= 8;
-                bitsLeft -= 8;
+                ucharPtr[i] = (cl_uchar)( bits & 255 );
+                bits >>= 8; bitsLeft -= 8;
             }
             break;
 
         case kShort:
             shortPtr = (cl_short *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                 {
                     bits = genrand_int32(d);
                     bitsLeft = 32;
                 }
-                shortPtr[i] = (cl_short)((cl_int)(bits & 65535) - 32767);
-                bits >>= 16;
-                bitsLeft -= 16;
+                shortPtr[i] = (cl_short)( (cl_int)( bits & 65535 ) - 32767 );
+                bits >>= 16; bitsLeft -= 16;
             }
             break;
 
         case kUShort:
         case kUnsignedShort:
             ushortPtr = (cl_ushort *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                 {
                     bits = genrand_int32(d);
                     bitsLeft = 32;
                 }
-                ushortPtr[i] = (cl_ushort)((cl_int)(bits & 65535));
-                bits >>= 16;
-                bitsLeft -= 16;
+                ushortPtr[i] = (cl_ushort)( (cl_int)( bits & 65535 ) );
+                bits >>= 16; bitsLeft -= 16;
             }
             break;
 
         case kInt:
             intPtr = (cl_int *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
                 intPtr[i] = (cl_int)genrand_int32(d);
             }
@@ -1011,7 +970,7 @@
         case kUInt:
         case kUnsignedInt:
             uintPtr = (cl_uint *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
                 uintPtr[i] = (unsigned int)genrand_int32(d);
             }
@@ -1019,136 +978,157 @@
 
         case kLong:
             longPtr = (cl_long *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                longPtr[i] = (cl_long)genrand_int32(d)
-                    | ((cl_long)genrand_int32(d) << 32);
+                longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
             }
             break;
 
         case kULong:
         case kUnsignedLong:
             ulongPtr = (cl_ulong *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                ulongPtr[i] = (cl_ulong)genrand_int32(d)
-                    | ((cl_ulong)genrand_int32(d) << 32);
+                ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
             }
             break;
 
         case kFloat:
             floatPtr = (cl_float *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
                 // [ -(double) 0x7fffffff, (double) 0x7fffffff ]
                 double t = genrand_real1(d);
-                floatPtr[i] = (float)((1.0 - t) * -(double)0x7fffffff
-                                      + t * (double)0x7fffffff);
+                floatPtr[i] = (float) ((1.0 - t) * -(double) 0x7fffffff + t * (double) 0x7fffffff);
             }
             break;
 
         case kDouble:
             doublePtr = (cl_double *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                cl_long u = (cl_long)genrand_int32(d)
-                    | ((cl_long)genrand_int32(d) << 32);
-                double t = (double)u;
-                // scale [-2**63, 2**63] to [-2**31, 2**31]
-                t *= MAKE_HEX_DOUBLE(0x1.0p-32, 0x1, -32);
+                cl_long u = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
+                double t = (double) u;
+                t *= MAKE_HEX_DOUBLE( 0x1.0p-32, 0x1, -32 );        // scale [-2**63, 2**63] to [-2**31, 2**31]
                 doublePtr[i] = t;
             }
             break;
 
         case kHalf:
             halfPtr = (ushort *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
             {
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                 {
                     bits = genrand_int32(d);
                     bitsLeft = 32;
                 }
-                halfPtr[i] =
-                    bits & 65535; /* Kindly generates random bits for us */
-                bits >>= 16;
-                bitsLeft -= 16;
+                halfPtr[i] = bits & 65535;     /* Kindly generates random bits for us */
+                bits >>= 16; bitsLeft -= 16;
             }
             break;
 
         default:
-            log_error(
-                "ERROR: Invalid type passed in to generate_random_data!\n");
+            log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
             break;
     }
 }
 
-void *create_random_data(ExplicitType type, MTdata d, size_t count)
+void * create_random_data( ExplicitType type, MTdata d, size_t count )
 {
-    void *data = malloc(get_explicit_type_size(type) * count);
-    generate_random_data(type, count, d, data);
+    void *data = malloc( get_explicit_type_size( type ) * count );
+    generate_random_data( type, count, d, data );
     return data;
 }
 
-cl_long read_upscale_signed(void *inRaw, ExplicitType inType)
+cl_long read_upscale_signed( void *inRaw, ExplicitType inType )
 {
-    switch (inType)
+    switch( inType )
     {
-        case kChar: return (cl_long)(*((cl_char *)inRaw));
+        case kChar:
+            return (cl_long)( *( (cl_char *)inRaw ) );
         case kUChar:
-        case kUnsignedChar: return (cl_long)(*((cl_uchar *)inRaw));
-        case kShort: return (cl_long)(*((cl_short *)inRaw));
+        case kUnsignedChar:
+            return (cl_long)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_long)( *( (cl_short *)inRaw ) );
         case kUShort:
-        case kUnsignedShort: return (cl_long)(*((cl_ushort *)inRaw));
-        case kInt: return (cl_long)(*((cl_int *)inRaw));
+        case kUnsignedShort:
+            return (cl_long)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_long)( *( (cl_int *)inRaw ) );
         case kUInt:
-        case kUnsignedInt: return (cl_long)(*((cl_uint *)inRaw));
-        case kLong: return (cl_long)(*((cl_long *)inRaw));
+        case kUnsignedInt:
+            return (cl_long)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_long)( *( (cl_long *)inRaw ) );
         case kULong:
-        case kUnsignedLong: return (cl_long)(*((cl_ulong *)inRaw));
-        default: return 0;
+        case kUnsignedLong:
+            return (cl_long)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
     }
 }
 
-cl_ulong read_upscale_unsigned(void *inRaw, ExplicitType inType)
+cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType )
 {
-    switch (inType)
+    switch( inType )
     {
-        case kChar: return (cl_ulong)(*((cl_char *)inRaw));
+        case kChar:
+            return (cl_ulong)( *( (cl_char *)inRaw ) );
         case kUChar:
-        case kUnsignedChar: return (cl_ulong)(*((cl_uchar *)inRaw));
-        case kShort: return (cl_ulong)(*((cl_short *)inRaw));
+        case kUnsignedChar:
+            return (cl_ulong)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_ulong)( *( (cl_short *)inRaw ) );
         case kUShort:
-        case kUnsignedShort: return (cl_ulong)(*((cl_ushort *)inRaw));
-        case kInt: return (cl_ulong)(*((cl_int *)inRaw));
+        case kUnsignedShort:
+            return (cl_ulong)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_ulong)( *( (cl_int *)inRaw ) );
         case kUInt:
-        case kUnsignedInt: return (cl_ulong)(*((cl_uint *)inRaw));
-        case kLong: return (cl_ulong)(*((cl_long *)inRaw));
+        case kUnsignedInt:
+            return (cl_ulong)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_ulong)( *( (cl_long *)inRaw ) );
         case kULong:
-        case kUnsignedLong: return (cl_ulong)(*((cl_ulong *)inRaw));
-        default: return 0;
+        case kUnsignedLong:
+            return (cl_ulong)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
     }
 }
 
-float read_as_float(void *inRaw, ExplicitType inType)
+float read_as_float( void *inRaw, ExplicitType inType )
 {
-    switch (inType)
+    switch( inType )
     {
-        case kChar: return (float)(*((cl_char *)inRaw));
+        case kChar:
+            return (float)( *( (cl_char *)inRaw ) );
         case kUChar:
-        case kUnsignedChar: return (float)(*((cl_char *)inRaw));
-        case kShort: return (float)(*((cl_short *)inRaw));
+        case kUnsignedChar:
+            return (float)( *( (cl_char *)inRaw ) );
+        case kShort:
+            return (float)( *( (cl_short *)inRaw ) );
         case kUShort:
-        case kUnsignedShort: return (float)(*((cl_ushort *)inRaw));
-        case kInt: return (float)(*((cl_int *)inRaw));
+        case kUnsignedShort:
+            return (float)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (float)( *( (cl_int *)inRaw ) );
         case kUInt:
-        case kUnsignedInt: return (float)(*((cl_uint *)inRaw));
-        case kLong: return (float)(*((cl_long *)inRaw));
+        case kUnsignedInt:
+            return (float)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (float)( *( (cl_long *)inRaw ) );
         case kULong:
-        case kUnsignedLong: return (float)(*((cl_ulong *)inRaw));
-        case kFloat: return *((float *)inRaw);
-        case kDouble: return (float)*((double *)inRaw);
-        default: return 0;
+        case kUnsignedLong:
+            return (float)( *( (cl_ulong *)inRaw ) );
+        case kFloat:
+            return *( (float *)inRaw );
+        case kDouble:
+            return (float) *( (double*)inRaw );
+        default:
+            return 0;
     }
 }
 
@@ -1160,60 +1140,59 @@
 
 double get_random_double(double low, double high, MTdata d)
 {
-    cl_ulong u =
-        (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
-    double t = (double)u * MAKE_HEX_DOUBLE(0x1.0p-64, 0x1, -64);
+    cl_ulong u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32 );
+    double t = (double) u * MAKE_HEX_DOUBLE( 0x1.0p-64, 0x1, -64);
     return (1.0f - t) * low + t * high;
 }
 
-float any_float(MTdata d)
+float  any_float( MTdata d )
 {
-    union {
-        float f;
+    union
+    {
+        float   f;
         cl_uint u;
-    } u;
+    }u;
 
     u.u = genrand_int32(d);
     return u.f;
 }
 
 
-double any_double(MTdata d)
+double  any_double( MTdata d )
 {
-    union {
-        double f;
+    union
+    {
+        double   f;
         cl_ulong u;
-    } u;
+    }u;
 
-    u.u = (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
+    u.u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
     return u.f;
 }
 
-int random_in_range(int minV, int maxV, MTdata d)
+int          random_in_range( int minV, int maxV, MTdata d )
 {
-    cl_ulong r = ((cl_ulong)genrand_int32(d)) * (maxV - minV + 1);
+    cl_ulong r = ((cl_ulong) genrand_int32(d) ) * (maxV - minV + 1);
     return (cl_uint)(r >> 32) + minV;
 }
 
 size_t get_random_size_t(size_t low, size_t high, MTdata d)
 {
-    enum
-    {
-        N = sizeof(size_t) / sizeof(int)
-    };
+  enum { N = sizeof(size_t)/sizeof(int) };
 
-    union {
-        int word[N];
-        size_t size;
-    } u;
+  union {
+    int word[N];
+    size_t size;
+  } u;
 
-    for (unsigned i = 0; i != N; ++i)
-    {
-        u.word[i] = genrand_int32(d);
-    }
+  for (unsigned i=0; i != N; ++i) {
+    u.word[i] = genrand_int32(d);
+  }
 
-    assert(low <= high && "Invalid random number range specified");
-    size_t range = high - low;
+  assert(low <= high && "Invalid random number range specified");
+  size_t range = high - low;
 
-    return (range) ? low + ((u.size - low) % range) : low;
+  return (range) ? low + ((u.size - low) % range) : low;
 }
+
+

diff --git a/test_common/harness/conversions.h b/test_common/harness/conversions.h
index e6880e0..50f2838 100644
--- a/test_common/harness/conversions.h
+++ b/test_common/harness/conversions.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -29,7 +29,7 @@
 
 enum ExplicitTypes
 {
-    kBool = 0,
+    kBool        = 0,
     kChar,
     kUChar,
     kUnsignedChar,
@@ -48,7 +48,7 @@
     kNumExplicitTypes
 };
 
-typedef enum ExplicitTypes ExplicitType;
+typedef enum ExplicitTypes    ExplicitType;
 
 enum RoundingTypes
 {
@@ -63,72 +63,61 @@
     kDefaultRoundingType = kRoundToNearest
 };
 
-typedef enum RoundingTypes RoundingType;
+typedef enum RoundingTypes    RoundingType;
 
-extern void print_type_to_string(ExplicitType type, void *data, char *string);
-extern size_t get_explicit_type_size(ExplicitType type);
-extern const char *get_explicit_type_name(ExplicitType type);
-extern void convert_explicit_value(void *inRaw, void *outRaw,
-                                   ExplicitType inType, bool saturate,
-                                   RoundingType roundType,
-                                   ExplicitType outType);
+extern void             print_type_to_string(ExplicitType type, void *data, char* string);
+extern size_t           get_explicit_type_size( ExplicitType type );
+extern const char *     get_explicit_type_name( ExplicitType type );
+extern void             convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
 
-extern void generate_random_data(ExplicitType type, size_t count, MTdata d,
-                                 void *outData);
-extern void *create_random_data(ExplicitType type, MTdata d, size_t count);
+extern void             generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
+extern void    *         create_random_data( ExplicitType type, MTdata d, size_t count );
 
-extern cl_long read_upscale_signed(void *inRaw, ExplicitType inType);
-extern cl_ulong read_upscale_unsigned(void *inRaw, ExplicitType inType);
-extern float read_as_float(void *inRaw, ExplicitType inType);
+extern cl_long          read_upscale_signed( void *inRaw, ExplicitType inType );
+extern cl_ulong         read_upscale_unsigned( void *inRaw, ExplicitType inType );
+extern float            read_as_float( void *inRaw, ExplicitType inType );
 
-extern float get_random_float(float low, float high, MTdata d);
-extern double get_random_double(double low, double high, MTdata d);
-extern float any_float(MTdata d);
-extern double any_double(MTdata d);
+extern float            get_random_float(float low, float high, MTdata d);
+extern double           get_random_double(double low, double high, MTdata d);
+extern float            any_float( MTdata d );
+extern double           any_double( MTdata d );
 
-extern int random_in_range(int minV, int maxV, MTdata d);
+extern int              random_in_range( int minV, int maxV, MTdata d );
 
 size_t get_random_size_t(size_t low, size_t high, MTdata d);
 
 // Note: though this takes a double, this is for use with single precision tests
-static inline int IsFloatSubnormal(float x)
+static inline int IsFloatSubnormal( float x )
 {
 #if 2 == FLT_RADIX
     // Do this in integer to avoid problems with FTZ behavior
-    union {
-        float d;
-        uint32_t u;
-    } u;
+    union{ float d; uint32_t u;}u;
     u.d = fabsf(x);
-    return (u.u - 1) < 0x007fffffU;
+    return (u.u-1) < 0x007fffffU;
 #else
-    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware --
-    // will fail if you flush subnormals to zero
-    return fabs(x) < (double)FLT_MIN && x != 0.0;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
 #endif
 }
 
-static inline int IsDoubleSubnormal(double x)
+static inline int IsDoubleSubnormal( double x )
 {
 #if 2 == FLT_RADIX
     // Do this in integer to avoid problems with FTZ behavior
-    union {
-        double d;
-        uint64_t u;
-    } u;
-    u.d = fabs(x);
-    return (u.u - 1) < 0x000fffffffffffffULL;
+    union{ double d; uint64_t u;}u;
+    u.d = fabs( x);
+    return (u.u-1) < 0x000fffffffffffffULL;
 #else
-    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware --
-    // will fail if you flush subnormals to zero
-    return fabs(x) < (double)DBL_MIN && x != 0.0;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
 #endif
 }
 
-static inline int IsHalfSubnormal(cl_half x)
-{
-    // this relies on interger overflow to exclude 0 as a subnormal
-    return ((x & 0x7fffU) - 1U) < 0x03ffU;
+static inline int IsHalfSubnormal( cl_half x )
+{ 
+    return ( ( x & 0x7fffU ) - 1U ) < 0x03ffU; 
 }
 
 #endif // _conversions_h
+
+

diff --git a/test_common/harness/crc32.h b/test_common/harness/crc32.h
index 65ca15e..1913063 100644
--- a/test_common/harness/crc32.h
+++ b/test_common/harness/crc32.h

@@ -18,7 +18,7 @@
 #ifndef _CRC32_H_
 #define _CRC32_H_
 
-#include <stdint.h>
+#include <stdint.h> 
 #include <stddef.h>
 
 uint32_t crc32(const void *buf, size_t size);

diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp
index 287a142..a5b0a58 100644
--- a/test_common/harness/deviceInfo.cpp
+++ b/test_common/harness/deviceInfo.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017-2019 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,14 +24,14 @@
 
 /* Helper to return a string containing device information for the specified
  * device info parameter. */
-std::string get_device_info_string(cl_device_id device,
-                                   cl_device_info param_name)
+static std::string get_device_info_string(cl_device_id device,
+                                          cl_device_info param_name)
 {
     size_t size = 0;
     int err;
 
     if ((err = clGetDeviceInfo(device, param_name, 0, NULL, &size))
-            != CL_SUCCESS
+        != CL_SUCCESS
         || size == 0)
     {
         throw std::runtime_error("clGetDeviceInfo failed\n");
@@ -45,8 +45,7 @@
         throw std::runtime_error("clGetDeviceInfo failed\n");
     }
 
-    /* The returned string does not include the null terminator. */
-    return std::string(info.data(), size - 1);
+    return std::string(info.begin(), info.end());
 }
 
 /* Determines if an extension is supported by a device. */
@@ -80,21 +79,3 @@
 {
     return get_device_info_string(device, CL_DEVICE_VERSION);
 }
-
-/* Returns a string containing the device name. */
-std::string get_device_name(cl_device_id device)
-{
-    return get_device_info_string(device, CL_DEVICE_NAME);
-}
-
-size_t get_max_param_size(cl_device_id device)
-{
-    size_t ret(0);
-    if (clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(ret), &ret,
-                        nullptr)
-        != CL_SUCCESS)
-    {
-        throw std::runtime_error("clGetDeviceInfo failed\n");
-    }
-    return ret;
-}

diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h
index f8c5580..d4432ea 100644
--- a/test_common/harness/deviceInfo.h
+++ b/test_common/harness/deviceInfo.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017-2019 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,11 +23,6 @@
 
 #include <CL/opencl.h>
 
-/* Helper to return a string containing device information for the specified
- * device info parameter. */
-std::string get_device_info_string(cl_device_id device,
-                                   cl_device_info param_name);
-
 /* Determines if an extension is supported by a device. */
 int is_extension_available(cl_device_id device, const char *extensionName);
 
@@ -40,10 +35,4 @@
 /* Returns a string containing the supported OpenCL version for a device. */
 std::string get_device_version_string(cl_device_id device);
 
-/* Returns a string containing the device name. */
-std::string get_device_name(cl_device_id device);
-
-// Returns the maximum size in bytes for Kernel Parameters
-size_t get_max_param_size(cl_device_id device);
-
 #endif // _deviceInfo_h

diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp
index 22a2677..343be3f 100644
--- a/test_common/harness/errorHelpers.cpp
+++ b/test_common/harness/errorHelpers.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -22,105 +22,87 @@
 
 #include "parseParameters.h"
 
-#include <CL/cl_half.h>
-
-const char *IGetErrorString(int clErrorCode)
+const char    *IGetErrorString( int clErrorCode )
 {
-    switch (clErrorCode)
+    switch( clErrorCode )
     {
-        case CL_SUCCESS: return "CL_SUCCESS";
-        case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
-        case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
-        case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
-        case CL_MEM_OBJECT_ALLOCATION_FAILURE:
-            return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-        case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
-        case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
-        case CL_PROFILING_INFO_NOT_AVAILABLE:
-            return "CL_PROFILING_INFO_NOT_AVAILABLE";
-        case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
-        case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
-        case CL_IMAGE_FORMAT_NOT_SUPPORTED:
-            return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+        case CL_SUCCESS:                return "CL_SUCCESS";
+        case CL_DEVICE_NOT_FOUND:        return "CL_DEVICE_NOT_FOUND";
+        case CL_DEVICE_NOT_AVAILABLE:    return "CL_DEVICE_NOT_AVAILABLE";
+        case CL_COMPILER_NOT_AVAILABLE:    return "CL_COMPILER_NOT_AVAILABLE";
+        case CL_MEM_OBJECT_ALLOCATION_FAILURE:    return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+        case CL_OUT_OF_RESOURCES:        return "CL_OUT_OF_RESOURCES";
+        case CL_OUT_OF_HOST_MEMORY:        return "CL_OUT_OF_HOST_MEMORY";
+        case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
+        case CL_MEM_COPY_OVERLAP:        return "CL_MEM_COPY_OVERLAP";
+        case CL_IMAGE_FORMAT_MISMATCH:    return "CL_IMAGE_FORMAT_MISMATCH";
+        case CL_IMAGE_FORMAT_NOT_SUPPORTED:    return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
         case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
-        case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
-        case CL_MISALIGNED_SUB_BUFFER_OFFSET:
-            return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
-        case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
-            return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+        case CL_MAP_FAILURE:            return "CL_MAP_FAILURE";
+        case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+        case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
         case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
         case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
         case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
         case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
-        case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
-            return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
-        case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
+        case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+        case CL_INVALID_VALUE:            return "CL_INVALID_VALUE";
         case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
-        case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
-        case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
-        case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
-        case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
-        case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
-        case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
-        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
-            return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
-        case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
-        case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
-        case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
-        case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
-        case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM";
-        case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
-        case CL_INVALID_PROGRAM_EXECUTABLE:
-            return "CL_INVALID_PROGRAM_EXECUTABLE";
-        case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
-        case CL_INVALID_KERNEL_DEFINITION:
-            return "CL_INVALID_KERNEL_DEFINITION";
-        case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
-        case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
-        case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
-        case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
-        case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
-        case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
-        case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
-        case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
-        case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
-        case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
-        case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
-        case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
-        case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
-        case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
-        case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
+        case CL_INVALID_DEVICE:            return "CL_INVALID_DEVICE";
+        case CL_INVALID_CONTEXT:        return "CL_INVALID_CONTEXT";
+        case CL_INVALID_QUEUE_PROPERTIES:    return "CL_INVALID_QUEUE_PROPERTIES";
+        case CL_INVALID_COMMAND_QUEUE:    return "CL_INVALID_COMMAND_QUEUE";
+        case CL_INVALID_HOST_PTR:    return "CL_INVALID_HOST_PTR";
+        case CL_INVALID_MEM_OBJECT:        return "CL_INVALID_MEM_OBJECT";
+        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:        return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+        case CL_INVALID_IMAGE_SIZE:        return "CL_INVALID_IMAGE_SIZE";
+        case CL_INVALID_SAMPLER:        return "CL_INVALID_SAMPLER";
+        case CL_INVALID_BINARY:        return "CL_INVALID_BINARY";
+        case CL_INVALID_BUILD_OPTIONS:        return "CL_INVALID_BUILD_OPTIONS";
+        case CL_INVALID_PROGRAM:        return "CL_INVALID_PROGRAM";
+        case CL_INVALID_PROGRAM_EXECUTABLE:        return "CL_INVALID_PROGRAM_EXECUTABLE";
+        case CL_INVALID_KERNEL_NAME:    return "CL_INVALID_KERNEL_NAME";
+        case CL_INVALID_KERNEL_DEFINITION:    return "CL_INVALID_KERNEL_DEFINITION";
+        case CL_INVALID_KERNEL:            return "CL_INVALID_KERNEL";
+        case CL_INVALID_ARG_INDEX:        return "CL_INVALID_ARG_INDEX";
+        case CL_INVALID_ARG_VALUE:        return "CL_INVALID_ARG_VALUE";
+        case CL_INVALID_ARG_SIZE:        return "CL_INVALID_ARG_SIZE";
+        case CL_INVALID_KERNEL_ARGS:    return "CL_INVALID_KERNEL_ARGS";
+        case CL_INVALID_WORK_DIMENSION:        return "CL_INVALID_WORK_DIMENSION";
+        case CL_INVALID_WORK_GROUP_SIZE:    return "CL_INVALID_WORK_GROUP_SIZE";
+        case CL_INVALID_WORK_ITEM_SIZE:    return "CL_INVALID_WORK_ITEM_SIZE";
+        case CL_INVALID_GLOBAL_OFFSET:        return "CL_INVALID_GLOBAL_OFFSET";
+        case CL_INVALID_EVENT_WAIT_LIST:    return "CL_INVALID_EVENT_WAIT_LIST";
+        case CL_INVALID_EVENT:            return "CL_INVALID_EVENT";
+        case CL_INVALID_OPERATION:        return "CL_INVALID_OPERATION";
+        case CL_INVALID_GL_OBJECT:        return "CL_INVALID_GL_OBJECT";
+        case CL_INVALID_BUFFER_SIZE:    return "CL_INVALID_BUFFER_SIZE";
+        case CL_INVALID_MIP_LEVEL:      return "CL_INVALID_MIP_LEVEL";
         case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
         case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
         case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
         case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
         case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
-        case CL_INVALID_DEVICE_PARTITION_COUNT:
-            return "CL_INVALID_DEVICE_PARTITION_COUNT";
-        case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE";
-        case CL_INVALID_DEVICE_QUEUE: return "CL_INVALID_DEVICE_QUEUE";
-        case CL_INVALID_SPEC_ID: return "CL_INVALID_SPEC_ID";
-        case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
-            return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
+        case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
         default: return "(unknown)";
     }
 }
 
-const char *GetChannelOrderName(cl_channel_order order)
+const char *GetChannelOrderName( cl_channel_order order )
 {
-    switch (order)
+    switch( order )
     {
-        case CL_R: return "CL_R";
-        case CL_A: return "CL_A";
-        case CL_Rx: return "CL_Rx";
-        case CL_RG: return "CL_RG";
-        case CL_RA: return "CL_RA";
-        case CL_RGx: return "CL_RGx";
-        case CL_RGB: return "CL_RGB";
-        case CL_RGBx: return "CL_RGBx";
-        case CL_RGBA: return "CL_RGBA";
-        case CL_ARGB: return "CL_ARGB";
-        case CL_BGRA: return "CL_BGRA";
+        case CL_R:      return "CL_R";
+        case CL_A:      return "CL_A";
+        case CL_Rx:     return "CL_Rx";
+        case CL_RG:     return "CL_RG";
+        case CL_RA:     return "CL_RA";
+        case CL_RGx:    return "CL_RGx";
+        case CL_RGB:    return "CL_RGB";
+        case CL_RGBx:   return "CL_RGBx";
+        case CL_RGBA:      return "CL_RGBA";
+        case CL_ARGB:      return "CL_ARGB";
+        case CL_BGRA:      return "CL_BGRA";
         case CL_INTENSITY: return "CL_INTENSITY";
         case CL_LUMINANCE: return "CL_LUMINANCE";
 #if defined CL_1RGB_APPLE
@@ -143,9 +125,9 @@
     }
 }
 
-int IsChannelOrderSupported(cl_channel_order order)
+int IsChannelOrderSupported( cl_channel_order order )
 {
-    switch (order)
+    switch( order )
     {
         case CL_R:
         case CL_A:
@@ -165,47 +147,51 @@
         case CL_sRGBx:
         case CL_sBGRA:
         case CL_sRGBA:
-        case CL_DEPTH: return 1;
+        case CL_DEPTH:
+            return 1;
 #if defined CL_1RGB_APPLE
-        case CL_1RGB_APPLE: return 1;
+        case CL_1RGB_APPLE:
+            return 1;
 #endif
 #if defined CL_BGR1_APPLE
-        case CL_BGR1_APPLE: return 1;
+        case CL_BGR1_APPLE:
+            return 1;
 #endif
-        default: return 0;
+        default:
+            return 0;
     }
 }
 
-const char *GetChannelTypeName(cl_channel_type type)
+const char *GetChannelTypeName( cl_channel_type type )
 {
-    switch (type)
+    switch( type )
     {
-        case CL_SNORM_INT8: return "CL_SNORM_INT8";
-        case CL_SNORM_INT16: return "CL_SNORM_INT16";
-        case CL_UNORM_INT8: return "CL_UNORM_INT8";
-        case CL_UNORM_INT16: return "CL_UNORM_INT16";
-        case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
-        case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
-        case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
-        case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
-        case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
-        case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
-        case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
-        case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
-        case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
-        case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
-        case CL_FLOAT: return "CL_FLOAT";
+        case CL_SNORM_INT8:         return "CL_SNORM_INT8";
+        case CL_SNORM_INT16:        return "CL_SNORM_INT16";
+        case CL_UNORM_INT8:         return "CL_UNORM_INT8";
+        case CL_UNORM_INT16:        return "CL_UNORM_INT16";
+        case CL_UNORM_SHORT_565:    return "CL_UNORM_SHORT_565";
+        case CL_UNORM_SHORT_555:    return "CL_UNORM_SHORT_555";
+        case CL_UNORM_INT_101010:   return "CL_UNORM_INT_101010";
+        case CL_SIGNED_INT8:        return "CL_SIGNED_INT8";
+        case CL_SIGNED_INT16:       return "CL_SIGNED_INT16";
+        case CL_SIGNED_INT32:       return "CL_SIGNED_INT32";
+        case CL_UNSIGNED_INT8:      return "CL_UNSIGNED_INT8";
+        case CL_UNSIGNED_INT16:     return "CL_UNSIGNED_INT16";
+        case CL_UNSIGNED_INT32:     return "CL_UNSIGNED_INT32";
+        case CL_HALF_FLOAT:         return "CL_HALF_FLOAT";
+        case CL_FLOAT:              return "CL_FLOAT";
 #ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
+        case CL_SFIXED14_APPLE:     return "CL_SFIXED14_APPLE";
 #endif
-        case CL_UNORM_INT24: return "CL_UNORM_INT24";
-        default: return NULL;
+        case CL_UNORM_INT24:        return "CL_UNORM_INT24";
+        default:                    return NULL;
     }
 }
 
-int IsChannelTypeSupported(cl_channel_type type)
+int IsChannelTypeSupported( cl_channel_type type )
 {
-    switch (type)
+    switch( type )
     {
         case CL_SNORM_INT8:
         case CL_SNORM_INT16:
@@ -222,408 +208,440 @@
         case CL_UNSIGNED_INT16:
         case CL_UNSIGNED_INT32:
         case CL_HALF_FLOAT:
-        case CL_FLOAT: return 1;
+        case CL_FLOAT:
+            return 1;
 #ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: return 1;
+        case CL_SFIXED14_APPLE:
+            return 1;
 #endif
-        default: return 0;
+        default:
+            return 0;
     }
 }
 
-const char *GetAddressModeName(cl_addressing_mode mode)
+const char *GetAddressModeName( cl_addressing_mode mode )
 {
-    switch (mode)
+    switch( mode )
     {
-        case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
-        case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
-        case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
-        case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
-        case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
-        default: return NULL;
+        case CL_ADDRESS_NONE:                return "CL_ADDRESS_NONE";
+        case CL_ADDRESS_CLAMP_TO_EDGE:        return "CL_ADDRESS_CLAMP_TO_EDGE";
+        case CL_ADDRESS_CLAMP:                return "CL_ADDRESS_CLAMP";
+        case CL_ADDRESS_REPEAT:                return "CL_ADDRESS_REPEAT";
+        case CL_ADDRESS_MIRRORED_REPEAT:    return "CL_ADDRESS_MIRRORED_REPEAT";
+        default:                            return NULL;
     }
 }
 
-const char *GetDeviceTypeName(cl_device_type type)
+const char *GetDeviceTypeName( cl_device_type type )
 {
-    switch (type)
+    switch( type )
     {
-        case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
-        case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
-        case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
-        case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
-        default: return NULL;
+        case CL_DEVICE_TYPE_GPU:    return "CL_DEVICE_TYPE_GPU";
+        case CL_DEVICE_TYPE_CPU:    return "CL_DEVICE_TYPE_CPU";
+        case CL_DEVICE_TYPE_ACCELERATOR:    return "CL_DEVICE_TYPE_ACCELERATOR";
+        case CL_DEVICE_TYPE_ALL:    return "CL_DEVICE_TYPE_ALL";
+        default:                    return NULL;
     }
 }
 
-const char *GetDataVectorString(void *dataBuffer, size_t typeSize,
-                                size_t vecSize, char *buffer)
+const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
 {
-    static char scratch[1024];
+    static char scratch[ 1024 ];
     size_t i, j;
 
-    if (buffer == NULL) buffer = scratch;
+    if( buffer == NULL )
+        buffer = scratch;
 
     unsigned char *p = (unsigned char *)dataBuffer;
     char *bPtr;
 
-    buffer[0] = 0;
+    buffer[ 0 ] = 0;
     bPtr = buffer;
-    for (i = 0; i < vecSize; i++)
+    for( i = 0; i < vecSize; i++ )
     {
-        if (i > 0)
+        if( i > 0 )
         {
-            bPtr[0] = ' ';
+            bPtr[ 0 ] = ' ';
             bPtr++;
         }
-        for (j = 0; j < typeSize; j++)
+        for( j = 0; j < typeSize; j++ )
         {
-            sprintf(bPtr, "%02x", (unsigned int)p[typeSize - j - 1]);
+            sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
             bPtr += 2;
         }
         p += typeSize;
     }
-    bPtr[0] = 0;
+    bPtr[ 0 ] = 0;
 
     return buffer;
 }
 
-const char *GetQueuePropertyName(cl_command_queue_properties property)
-{
-    switch (property)
-    {
-        case CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE:
-            return "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE";
-        case CL_QUEUE_PROFILING_ENABLE: return "CL_QUEUE_PROFILING_ENABLE";
-        case CL_QUEUE_ON_DEVICE: return "CL_QUEUE_ON_DEVICE";
-        case CL_QUEUE_ON_DEVICE_DEFAULT: return "CL_QUEUE_ON_DEVICE_DEFAULT";
-        default: return "(unknown)";
-    }
-}
-
 #ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
+#define MAX( _a, _b )       ((_a) > (_b) ? (_a) : (_b))
 #endif
 
-#if defined(_MSC_VER)
-#define scalbnf(_a, _i) ldexpf(_a, _i)
-#define scalbn(_a, _i) ldexp(_a, _i)
-#define scalbnl(_a, _i) ldexpl(_a, _i)
+#if defined( _MSC_VER )
+#define scalbnf(_a, _i )    ldexpf( _a, _i )
+#define scalbn(_a, _i )     ldexp( _a, _i )
+#define scalbnl(_a, _i )    ldexpl( _a, _i )
 #endif
 
+static float Ulp_Error_Half_Float( float test, double reference );
+static inline float  half2float( cl_ushort half );
+
 // taken from math tests
-#define HALF_MIN_EXP -13
-#define HALF_MANT_DIG 11
-static float Ulp_Error_Half_Float(float test, double reference)
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+static float Ulp_Error_Half_Float( float test, double reference )
 {
-    union {
-        double d;
-        uint64_t u;
-    } u;
-    u.d = reference;
+    union{ double d; uint64_t u; }u;     u.d = reference;
 
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
     //
     //    if( (float) reference == test )
     //        return 0.0f;
     //
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
 
     double testVal = test;
-
-    if (isinf(reference))
-    {
-        if (testVal == reference) return 0.0f;
-
-        return (float)(testVal - reference);
-    }
-
-    if (isinf(testVal))
-    {
-        // Allow overflow within the limit of the allowed ulp error. Towards
-        // that end we pretend the test value is actually 2**16, the next value
-        // that would appear in the number line if half had sufficient range.
-        testVal = copysign(65536.0, testVal);
-    }
-
-
-    if (u.u & 0x000fffffffffffffULL)
+    if( u.u & 0x000fffffffffffffULL )
     { // Non-power of two and NaN
-        if (isnan(reference) && isnan(test))
-            return 0.0f; // if we are expecting a NaN, any NaN is fine
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
 
         // The unbiased exponent of the ulp unit place
-        int ulp_exp =
-            HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1);
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
 
         // Scale the exponent of the error
-        return (float)scalbn(testVal - reference, ulp_exp);
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
     }
 
     // reference is a normal power of two or a zero
-    int ulp_exp =
-        HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1);
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
 
     // Scale the exponent of the error
-    return (float)scalbn(testVal - reference, ulp_exp);
+    return (float) scalbn( testVal - reference, ulp_exp );
 }
 
-float Ulp_Error_Half(cl_half test, float reference)
+// Taken from vLoadHalf test
+static inline float half2float( cl_ushort us )
 {
-    return Ulp_Error_Half_Float(cl_half_to_float(test), reference);
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+float Ulp_Error_Half( cl_ushort test, float reference )
+{
+    return Ulp_Error_Half_Float( half2float(test), reference );
 }
 
 
-float Ulp_Error(float test, double reference)
+float Ulp_Error( float test, double reference )
 {
-    union {
-        double d;
-        uint64_t u;
-    } u;
-    u.d = reference;
+    union{ double d; uint64_t u; }u;     u.d = reference;
     double testVal = test;
 
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
     //
     //    if( (float) reference == test )
     //        return 0.0f;
     //
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
 
 
-    if (isinf(reference))
+    if( isinf( reference ) )
     {
-        if (testVal == reference) return 0.0f;
+        if( testVal == reference )
+            return 0.0f;
 
-        return (float)(testVal - reference);
+        return (float) (testVal - reference );
     }
 
-    if (isinf(testVal))
-    { // infinite test value, but finite (but possibly overflowing in float)
-      // reference.
+    if( isinf( testVal) )
+    { // infinite test value, but finite (but possibly overflowing in float) reference.
       //
-      // The function probably overflowed prematurely here. Formally, the spec
-      // says this is an infinite ulp error and should not be tolerated.
-      // Unfortunately, this would mean that the internal precision of some
-      // half_pow implementations would have to be 29+ bits at half_powr(
-      // 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
-      // is not exactly 128.0. You might represent this for example as 4*(32 -
-      // ~2**-24), which after rounding to single is 4*32 = 128, which will
-      // ultimately result in premature overflow, even though a good faith
-      // representation would be correct to within 2**-29 interally.
+      // The function probably overflowed prematurely here. Formally, the spec says this is
+      // an infinite ulp error and should not be tolerated. Unfortunately, this would mean
+      // that the internal precision of some half_pow implementations would have to be 29+ bits
+      // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
+      // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
+      // after rounding to single is 4*32 = 128, which will ultimately result in premature
+      // overflow, even though a good faith representation would be correct to within 2**-29
+      // interally.
 
-        // In the interest of not requiring the implementation go to
-        // extraordinary lengths to deliver a half precision function, we allow
-        // premature overflow within the limit of the allowed ulp error.
-        // Towards, that end, we "pretend" the test value is actually 2**128,
-        // the next value that would appear in the number line if float had
-        // sufficient range.
-        testVal = copysign(MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal);
+        // In the interest of not requiring the implementation go to extraordinary lengths to
+        // deliver a half precision function, we allow premature overflow within the limit
+        // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
+        // 2**128, the next value that would appear in the number line if float had sufficient range.
+        testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
 
-        // Note that the same hack may not work in long double, which is not
-        // guaranteed to have more range than double.  It is not clear that
-        // premature overflow should be tolerated for double.
+        // Note that the same hack may not work in long double, which is not guaranteed to have
+        // more range than double.  It is not clear that premature overflow should be tolerated for
+        // double.
     }
 
-    if (u.u & 0x000fffffffffffffULL)
+    if( u.u & 0x000fffffffffffffULL )
     { // Non-power of two and NaN
-        if (isnan(reference) && isnan(test))
-            return 0.0f; // if we are expecting a NaN, any NaN is fine
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
 
         // The unbiased exponent of the ulp unit place
-        int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1);
+        int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
 
         // Scale the exponent of the error
-        return (float)scalbn(testVal - reference, ulp_exp);
+        return (float) scalbn( testVal - reference, ulp_exp );
     }
 
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
-    int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1);
+    int ulp_exp =  FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
 
     // Scale the exponent of the error
-    return (float)scalbn(testVal - reference, ulp_exp);
+    return (float) scalbn( testVal - reference, ulp_exp );
 }
 
-float Ulp_Error_Double(double test, long double reference)
+float Ulp_Error_Double( double test, long double reference )
 {
-    // Deal with long double = double
-    // On most systems long double is a higher precision type than double. They
-    // provide either a 80-bit or greater floating point type, or they provide a
-    // head-tail double double format. That is sufficient to represent the
-    // accuracy of a floating point result to many more bits than double and we
-    // can calculate sub-ulp errors. This is the standard system for which this
-    // test suite is designed.
-    //
-    // On some systems double and long double are the same thing. Then we run
-    // into a problem, because our representation of the infinitely precise
-    // result (passed in as reference above) can be off by as much as a half
-    // double precision ulp itself.  In this case, we inflate the reported error
-    // by half an ulp to take this into account.  A more correct and permanent
-    // fix would be to undertake refactoring the reference code to return
-    // results in this format:
-    //
-    //    typedef struct DoubleReference
-    //    {
-    //        // true value = correctlyRoundedResult + ulps *
-    //        //    ulp(correctlyRoundedResult)  (infinitely precise)
-    //        // as best we can:
-    //        double correctlyRoundedResult;
-    //        // plus a fractional amount to account for the difference
-    //        // between infinitely precise result and correctlyRoundedResult,
-    //        // in units of ulps:
-    //        double ulps;
-    //    } DoubleReference;
-    //
-    // This would provide a useful higher-than-double precision format for
-    // everyone that we can use, and would solve a few problems with
-    // representing absolute errors below DBL_MIN and over DBL_MAX for systems
-    // that use a head to tail double double for long double.
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
 
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
     //
     //    if( (float) reference == test )
     //        return 0.0f;
     //
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
 
 
     int x;
     long double testVal = test;
-    if (0.5L != frexpl(reference, &x))
+    if( 0.5L != frexpl( reference, &x) )
     { // Non-power of two and NaN
-        if (isinf(reference))
+        if( isinf( reference ) )
         {
-            if (testVal == reference) return 0.0f;
+            if( testVal == reference )
+                return 0.0f;
 
-            return (float)(testVal - reference);
+            return (float) ( testVal - reference );
         }
 
-        if (isnan(reference) && isnan(test))
-            return 0.0f; // if we are expecting a NaN, any NaN is fine
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
 
         // The unbiased exponent of the ulp unit place
-        int ulp_exp =
-            DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
 
         // Scale the exponent of the error
-        float result = (float)scalbnl(testVal - reference, ulp_exp);
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
 
-        // account for rounding error in reference result on systems that do not
-        // have a higher precision floating point type (see above)
-        if (sizeof(long double) == sizeof(double))
-            result += copysignf(0.5f, result);
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
 
         return result;
+
     }
 
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
-    int ulp_exp =
-        DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
 
     // Scale the exponent of the error
-    float result = (float)scalbnl(testVal - reference, ulp_exp);
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
 
-    // account for rounding error in reference result on systems that do not
-    // have a higher precision floating point type (see above)
-    if (sizeof(long double) == sizeof(double))
-        result += copysignf(0.5f, result);
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
 
     return result;
 }
 
-cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
-                       cl_device_id *device_list)
+cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
 {
-    int error;
-    size_t size_ret;
+  int error;
+  size_t size_ret;
 
-    // Does the program object exist?
-    if (program != NULL)
-    {
+  // Does the program object exist?
+  if (program != NULL) {
 
-        // Was the number of devices given
-        if (num_devices == 0)
-        {
+    // Was the number of devices given
+    if (num_devices == 0) {
 
-            // If zero devices were specified then allocate and query the device
-            // list from the context
-            cl_context context;
-            error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT,
-                                     sizeof(context), &context, NULL);
-            test_error(error, "Unable to query program's context");
-            error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
-                                     &size_ret);
-            test_error(error, "Unable to query context's device size");
-            num_devices = size_ret / sizeof(cl_device_id);
-            device_list = (cl_device_id *)malloc(size_ret);
-            if (device_list == NULL)
-            {
-                print_error(error, "malloc failed");
-                return CL_OUT_OF_HOST_MEMORY;
-            }
-            error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret,
-                                     device_list, NULL);
-            test_error(error, "Unable to query context's devices");
-        }
+      // If zero devices were specified then allocate and query the device list from the context
+      cl_context context;
+      error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
+      test_error( error, "Unable to query program's context" );
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
+      test_error( error, "Unable to query context's device size" );
+      num_devices = size_ret / sizeof(cl_device_id);
+      device_list = (cl_device_id *) malloc(size_ret);
+      if (device_list == NULL) {
+          print_error( error, "malloc failed" );
+          return CL_OUT_OF_HOST_MEMORY;
+      }
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
+      test_error( error, "Unable to query context's devices" );
 
-        // For each device in the device_list
-        unsigned int i;
-        for (i = 0; i < num_devices; i++)
-        {
-
-            // Get the build status
-            cl_build_status build_status;
-            error = clGetProgramBuildInfo(
-                program, device_list[i], CL_PROGRAM_BUILD_STATUS,
-                sizeof(build_status), &build_status, &size_ret);
-            test_error(error, "Unable to query build status");
-
-            // If the build failed then log the status, and allocate the build
-            // log, log it and free it
-            if (build_status != CL_BUILD_SUCCESS)
-            {
-
-                log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n",
-                          (int)build_status);
-                error = clGetProgramBuildInfo(program, device_list[i],
-                                              CL_PROGRAM_BUILD_LOG, 0, NULL,
-                                              &size_ret);
-                test_error(error, "Unable to query build log size");
-                char *build_log = (char *)malloc(size_ret);
-                error = clGetProgramBuildInfo(program, device_list[i],
-                                              CL_PROGRAM_BUILD_LOG, size_ret,
-                                              build_log, &size_ret);
-                test_error(error, "Unable to query build log");
-                log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
-                free(build_log);
-            }
-        }
-
-        // Was the number of devices given
-        if (num_devices == 0)
-        {
-
-            // If zero devices were specified then free the device list
-            free(device_list);
-        }
     }
 
-    return CL_SUCCESS;
+    // For each device in the device_list
+    unsigned int i;
+    for (i = 0; i < num_devices; i++) {
+
+      // Get the build status
+      cl_build_status build_status;
+      error = clGetProgramBuildInfo(program,
+                                    device_list[i],
+                                    CL_PROGRAM_BUILD_STATUS,
+                                    sizeof(build_status),
+                                    &build_status,
+                                    &size_ret);
+      test_error( error, "Unable to query build status" );
+
+      // If the build failed then log the status, and allocate the build log, log it and free it
+      if (build_status != CL_BUILD_SUCCESS) {
+
+        log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to query build log size" );
+        char *build_log = (char *) malloc(size_ret);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to query build log" );
+        log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        free(build_log);
+
+      }
+
+    }
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then free the device list
+      free(device_list);
+
+    }
+
+  }
+
+  return CL_SUCCESS;
 }
 
-const char *subtests_to_skip_with_offline_compiler[] = {
-    "get_kernel_arg_info",
-    "binary_create",
+const char * subtests_requiring_opencl_1_2[] = {
+            "device_partition_equally",
+            "device_partition_by_counts",
+            "device_partition_by_affinity_domain_numa",
+            "device_partition_by_affinity_domain_l4_cache",
+            "device_partition_by_affinity_domain_l3_cache",
+            "device_partition_by_affinity_domain_l2_cache",
+            "device_partition_by_affinity_domain_l1_cache",
+            "device_partition_by_affinity_domain_next_partitionable",
+            "device_partition_all",
+    "buffer_fill_int",
+    "buffer_fill_uint",
+    "buffer_fill_short",
+    "buffer_fill_ushort",
+    "buffer_fill_char",
+    "buffer_fill_uchar",
+    "buffer_fill_long",
+    "buffer_fill_ulong",
+    "buffer_fill_float",
+    "buffer_fill_struct",
+  "test_mem_host_write_only_buffer",
+  "test_mem_host_write_only_subbuffer",
+  "test_mem_host_no_access_buffer",
+  "test_mem_host_no_access_subbuffer",
+  "test_mem_host_read_only_image",
+  "test_mem_host_write_only_image",
+  "test_mem_host_no_access_image",
+  // CL_MEM_HOST_{READ|WRITE}_ONLY api/
+    "get_buffer_info",
+    "get_image1d_info",
+    "get_image1d_array_info",
+    "get_image2d_array_info",
+  // gl/
+  "images_read_1D",
+  "images_write_1D",
+  "images_1D_getinfo",
+  "images_read_1Darray",
+  "images_write_1Darray",
+  "images_1Darray_getinfo",
+  "images_read_2Darray",
+  "images_write_2Darray",
+  "images_2Darray_getinfo",
+    "buffer_migrate",
+    "image_migrate",
+   // compiler/
     "load_program_source",
     "load_multistring_source",
     "load_two_kernel_source",
@@ -633,6 +651,9 @@
     "load_discreet_length_source",
     "get_program_source",
     "get_program_build_info",
+    "get_program_info",
+    "large_compile",
+    "async_build",
     "options_build_optimizations",
     "options_build_macro",
     "options_build_macro_existence",
@@ -644,16 +665,32 @@
     "preprocessor_pragma",
     "compiler_defines_for_extensions",
     "image_macro",
+    "simple_compile_only",
+    "simple_static_compile_only",
     "simple_extern_compile_only",
+    "simple_compile_with_callback",
     "simple_embedded_header_compile",
+    "simple_link_only",
     "two_file_regular_variable_access",
     "two_file_regular_struct_access",
     "two_file_regular_function_access",
+    "simple_link_with_callback",
     "simple_embedded_header_link",
+    "execute_after_simple_compile_and_link",
+    "execute_after_simple_compile_and_link_no_device_info",
     "execute_after_simple_compile_and_link_with_defines",
     "execute_after_simple_compile_and_link_with_callbacks",
+    "execute_after_simple_library_with_link",
+    "execute_after_two_file_link",
+    "execute_after_two_file_link",
     "execute_after_embedded_header_link",
     "execute_after_included_header_link",
+    "execute_after_serialize_reload_object",
+    "execute_after_serialize_reload_library",
+    "simple_library_only",
+    "simple_library_with_callback",
+    "simple_library_with_link",
+    "two_file_link",
     "multi_file_libraries",
     "multiple_files",
     "multiple_libraries",
@@ -661,47 +698,66 @@
     "multiple_embedded_headers",
     "program_binary_type",
     "compile_and_link_status_options_log",
-    "kernel_preprocessor_macros",
-    "execute_after_serialize_reload_library",
-    "execute_after_serialize_reload_object",
-    "execute_after_simple_compile_and_link",
-    "execute_after_simple_compile_and_link_no_device_info",
-    "execute_after_simple_library_with_link",
-    "execute_after_two_file_link",
-    "simple_compile_only",
-    "simple_compile_with_callback",
-    "simple_library_only",
-    "simple_library_with_callback",
-    "simple_library_with_link",
-    "simple_link_only",
-    "simple_link_with_callback",
-    "simple_static_compile_only",
-    "two_file_link",
-    "async_build",
-    "unload_repeated",
-    "unload_compile_unload_link",
-    "unload_build_unload_create_kernel",
-    "unload_link_different",
-    "unload_build_threaded",
-    "unload_build_info",
-    "unload_program_binaries",
-    "features_macro",
-    "progvar_prog_scope_misc",
-    "library_function"
+    // CL_PROGRAM_NUM_KERNELS, in api/
+    "get_kernel_arg_info",
+    "create_kernels_in_program",
+    // clEnqueue..WithWaitList, in events/
+    "event_enqueue_marker_with_event_list",
+    "event_enqueue_barrier_with_event_list",
+    "popcount"
 };
 
-int check_functions_for_offline_compiler(const char *subtestname,
-                                         cl_device_id device)
+const char * subtests_to_skip_with_offline_compiler[] = {
+            "get_kernel_arg_info",
+            "binary_create",
+            "load_program_source",
+            "load_multistring_source",
+            "load_two_kernel_source",
+            "load_null_terminated_source",
+            "load_null_terminated_multi_line_source",
+            "load_null_terminated_partial_multi_line_source",
+            "load_discreet_length_source",
+            "get_program_source",
+            "get_program_build_info",
+            "options_build_optimizations",
+            "options_build_macro",
+            "options_build_macro_existence",
+            "options_include_directory",
+            "options_denorm_cache",
+            "preprocessor_define_udef",
+            "preprocessor_include",
+            "preprocessor_line_error",
+            "preprocessor_pragma",
+            "compiler_defines_for_extensions",
+            "image_macro",
+            "simple_extern_compile_only",
+            "simple_embedded_header_compile",
+            "two_file_regular_variable_access",
+            "two_file_regular_struct_access",
+            "two_file_regular_function_access",
+            "simple_embedded_header_link",
+            "execute_after_simple_compile_and_link_with_defines",
+            "execute_after_simple_compile_and_link_with_callbacks",
+            "execute_after_embedded_header_link",
+            "execute_after_included_header_link",
+            "multi_file_libraries",
+            "multiple_files",
+            "multiple_libraries",
+            "multiple_files_multiple_libraries",
+            "multiple_embedded_headers",
+            "program_binary_type",
+            "compile_and_link_status_options_log",
+            "kernel_preprocessor_macros",
+};
+
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device)
 {
     if (gCompilationMode != kOnline)
     {
-        int nNotRequiredWithOfflineCompiler =
-            sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *);
+        int nNotRequiredWithOfflineCompiler = sizeof(subtests_to_skip_with_offline_compiler)/sizeof(char *);
         size_t i;
-        for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
-        {
-            if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i]))
-            {
+        for(i=0; i < nNotRequiredWithOfflineCompiler; ++i) {
+            if(!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) {
                 return 1;
             }
         }

diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index 1944601..727c213 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,179 +24,91 @@
 #include <CL/opencl.h>
 #endif
 #include <stdlib.h>
-#define LOWER_IS_BETTER 0
-#define HIGHER_IS_BETTER 1
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
 
 #include <stdio.h>
 #define test_start()
 #define log_info printf
 #define log_error printf
 #define log_missing_feature printf
-#define log_perf(_number, _higherBetter, _numType, _format, ...)               \
-    printf("Performance Number " _format " (in %s, %s): %g\n", ##__VA_ARGS__,  \
-           _numType, _higherBetter ? "higher is better" : "lower is better",   \
-           _number)
-#define vlog_perf(_number, _higherBetter, _numType, _format, ...)              \
-    printf("Performance Number " _format " (in %s, %s): %g\n", ##__VA_ARGS__,  \
-           _numType, _higherBetter ? "higher is better" : "lower is better",   \
-           _number)
+#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,        \
+                    _higherBetter?"higher is better":"lower is better", _number )
+#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,    \
+                    _higherBetter?"higher is better":"lower is better" , _number)
 #ifdef _WIN32
-#ifdef __MINGW32__
-// Use __mingw_printf since it supports "%a" format specifier
-#define vlog __mingw_printf
-#define vlog_error __mingw_printf
+    #ifdef __MINGW32__
+        // Use __mingw_printf since it supports "%a" format specifier
+        #define vlog __mingw_printf
+        #define vlog_error __mingw_printf
+    #else
+        // Use home-baked function that treats "%a" as "%f"
+    static int vlog_win32(const char *format, ...);
+    #define vlog vlog_win32
+    #define vlog_error vlog_win32
+    #endif
 #else
-// Use home-baked function that treats "%a" as "%f"
-static int vlog_win32(const char *format, ...);
-#define vlog vlog_win32
-#define vlog_error vlog_win32
-#endif
-#else
-#define vlog_error printf
-#define vlog printf
+    #define vlog_error printf
+    #define vlog printf
 #endif
 
-#define ct_assert(b) ct_assert_i(b, __LINE__)
-#define ct_assert_i(b, line) ct_assert_ii(b, line)
-#define ct_assert_ii(b, line)                                                  \
-    int _compile_time_assertion_on_line_##line[b ? 1 : -1];
+#define ct_assert(b)          ct_assert_i(b, __LINE__)
+#define ct_assert_i(b, line)  ct_assert_ii(b, line)
+#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
 
-#define test_fail(msg, ...)                                                    \
-    {                                                                          \
-        log_error(msg, ##__VA_ARGS__);                                         \
-        return TEST_FAIL;                                                      \
-    }
-#define test_error(errCode, msg) test_error_ret(errCode, msg, errCode)
-#define test_error_ret(errCode, msg, retValue)                                 \
-    {                                                                          \
-        auto errCodeResult = errCode;                                          \
-        if (errCodeResult != CL_SUCCESS)                                       \
-        {                                                                      \
-            print_error(errCodeResult, msg);                                   \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-#define print_error(errCode, msg)                                              \
-    log_error("ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString(errCode),   \
-              __FILE__, __LINE__);
+#define test_error(errCode,msg)    test_error_ret(errCode,msg,errCode)
+#define test_error_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
+#define print_error(errCode,msg)    log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
 
-#define test_missing_feature(errCode, msg)                                     \
-    test_missing_feature_ret(errCode, msg, errCode)
-// this macro should always return CL_SUCCESS, but print the missing feature
-// message
-#define test_missing_feature_ret(errCode, msg, retValue)                       \
-    {                                                                          \
-        if (errCode != CL_SUCCESS)                                             \
-        {                                                                      \
-            print_missing_feature(errCode, msg);                               \
-            return CL_SUCCESS;                                                 \
-        }                                                                      \
-    }
-#define print_missing_feature(errCode, msg)                                    \
-    log_missing_feature("ERROR: Subtest %s tests a feature not supported by "  \
-                        "the device version! (from %s:%d)\n",                  \
-                        msg, __FILE__, __LINE__);
+#define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the missing feature message
+#define test_missing_feature_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_missing_feature( errCode, msg ); return CL_SUCCESS ; } }
+#define print_missing_feature(errCode, msg) log_missing_feature("ERROR: Subtest %s tests a feature not supported by the device version! (from %s:%d)\n", msg, __FILE__, __LINE__ );
 
-#define test_missing_support_offline_cmpiler(errCode, msg)                     \
-    test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
-// this macro should always return CL_SUCCESS, but print the skip message on
-// test not supported with offline compiler
-#define test_missing_support_offline_cmpiler_ret(errCode, msg, retValue)       \
-    {                                                                          \
-        if (errCode != CL_SUCCESS)                                             \
-        {                                                                      \
-            log_info("INFO: Subtest %s tests is not supported in offline "     \
-                     "compiler execution path! (from %s:%d)\n",                \
-                     msg, __FILE__, __LINE__);                                 \
-            return TEST_SKIP;                                                  \
-        }                                                                      \
-    }
+#define test_missing_support_offline_cmpiler(errCode, msg) test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the skip message on test not supported with offline compiler
+#define test_missing_support_offline_cmpiler_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { log_info( "INFO: Subtest %s tests is not supported in offline compiler execution path! (from %s:%d)\n", msg, __FILE__, __LINE__ ); return TEST_SKIP ; } }
 
 // expected error code vs. what we got
-#define test_failure_error(errCode, expectedErrCode, msg)                      \
-    test_failure_error_ret(errCode, expectedErrCode, msg,                      \
-                           errCode != expectedErrCode)
-#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue)        \
-    {                                                                          \
-        if (errCode != expectedErrCode)                                        \
-        {                                                                      \
-            print_failure_error(errCode, expectedErrCode, msg);                \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-#define print_failure_error(errCode, expectedErrCode, msg)                     \
-    log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg,            \
-              IGetErrorString(errCode), IGetErrorString(expectedErrCode),      \
-              __FILE__, __LINE__);
-#define test_failure_warning(errCode, expectedErrCode, msg)                    \
-    test_failure_warning_ret(errCode, expectedErrCode, msg,                    \
-                             errCode != expectedErrCode)
-#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue)      \
-    {                                                                          \
-        if (errCode != expectedErrCode)                                        \
-        {                                                                      \
-            print_failure_warning(errCode, expectedErrCode, msg);              \
-            warnings++;                                                        \
-        }                                                                      \
-    }
-#define print_failure_warning(errCode, expectedErrCode, msg)                   \
-    log_error("WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg,          \
-              IGetErrorString(errCode), IGetErrorString(expectedErrCode),      \
-              __FILE__, __LINE__);
+#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
+#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
+#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
 
-// generate an error when an assertion is false (not error code related)
-#define test_assert_error(condition, msg)                                      \
-    test_assert_error_ret(condition, msg, TEST_FAIL)
-#define test_assert_error_ret(condition, msg, retValue)                        \
-    {                                                                          \
-        if (!(condition))                                                      \
-        {                                                                      \
-            print_assertion_error(condition, msg);                             \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-#define print_assertion_error(condition, msg)                                  \
-    log_error("ERROR: %s! (!(%s) from %s:%d)\n", msg, #condition, __FILE__,    \
-              __LINE__);
-
-#define ASSERT_SUCCESS(expr, msg)                                              \
-    do                                                                         \
-    {                                                                          \
-        cl_int _temp_retval = (expr);                                          \
-        if (_temp_retval != CL_SUCCESS)                                        \
-        {                                                                      \
-            std::stringstream ss;                                              \
-            ss << "ERROR: " << msg << "=" << IGetErrorString(_temp_retval)     \
-               << " at " << __FILE__ << ":" << __LINE__ << "\n";               \
-            throw std::runtime_error(ss.str());                                \
-        }                                                                      \
+#define ASSERT_SUCCESS(expr, msg)                                                                  \
+    do                                                                                             \
+    {                                                                                              \
+        cl_int _temp_retval = (expr);                                                              \
+        if (_temp_retval != CL_SUCCESS)                                                            \
+        {                                                                                          \
+            std::stringstream ss;                                                                  \
+            ss << "ERROR: " << msg << "=" << IGetErrorString(_temp_retval)                         \
+               << " at " << __FILE__ << ":" << __LINE__ << "\n";                                   \
+            throw std::runtime_error(ss.str());                                                    \
+        }                                                                                          \
     } while (0)
 
-extern const char *IGetErrorString(int clErrorCode);
+extern const char    *IGetErrorString( int clErrorCode );
 
-extern float Ulp_Error_Half(cl_half test, float reference);
-extern float Ulp_Error(float test, double reference);
-extern float Ulp_Error_Double(double test, long double reference);
+extern float Ulp_Error_Half( cl_ushort test, float reference );
+extern float Ulp_Error( float test, double reference );
+extern float Ulp_Error_Double( double test, long double reference );
 
-extern const char *GetChannelTypeName(cl_channel_type type);
-extern int IsChannelTypeSupported(cl_channel_type type);
-extern const char *GetChannelOrderName(cl_channel_order order);
-extern int IsChannelOrderSupported(cl_channel_order order);
-extern const char *GetAddressModeName(cl_addressing_mode mode);
-extern const char *GetQueuePropertyName(cl_command_queue_properties properties);
+extern const char *GetChannelTypeName( cl_channel_type type );
+extern int IsChannelTypeSupported( cl_channel_type type );
+extern const char *GetChannelOrderName( cl_channel_order order );
+extern int IsChannelOrderSupported( cl_channel_order order );
+extern const char *GetAddressModeName( cl_addressing_mode mode );
 
-extern const char *GetDeviceTypeName(cl_device_type type);
-int check_functions_for_offline_compiler(const char *subtestname,
-                                         cl_device_id device);
-cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
-                       cl_device_id *device_list);
+extern const char *GetDeviceTypeName( cl_device_type type );
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device);
 
-// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static
-// storage, but it's not reentrant then!)
-extern const char *GetDataVectorString(void *dataBuffer, size_t typeSize,
-                                       size_t vecSize, char *buffer);
+// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
+extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
 
-#if defined(_WIN32) && !defined(__MINGW32__)
+#if defined (_WIN32) && !defined(__MINGW32__)
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
@@ -204,21 +116,17 @@
 {
     const char *new_format = format;
 
-    if (strstr(format, "%a"))
-    {
+    if (strstr(format, "%a")) {
         char *temp;
-        if ((temp = strdup(format)) == NULL)
-        {
+        if ((temp = strdup(format)) == NULL) {
             printf("vlog_win32: Failed to allocate memory for strdup\n");
             return -1;
         }
         new_format = temp;
-        while (*temp)
-        {
+        while (*temp) {
             // replace %a with %f
-            if ((*temp == '%') && (*(temp + 1) == 'a'))
-            {
-                *(temp + 1) = 'f';
+            if ((*temp == '%') && (*(temp+1) == 'a')) {
+                *(temp+1) = 'f';
             }
             temp++;
         }
@@ -229,9 +137,8 @@
     vprintf(new_format, args);
     va_end(args);
 
-    if (new_format != format)
-    {
-        free((void *)new_format);
+    if (new_format != format) {
+        free((void*)new_format);
     }
 
     return 0;
@@ -240,3 +147,5 @@
 
 
 #endif // _errorHelpers_h
+
+

diff --git a/test_common/harness/featureHelpers.cpp b/test_common/harness/featureHelpers.cpp
deleted file mode 100644
index 07435c3..0000000
--- a/test_common/harness/featureHelpers.cpp
+++ /dev/null

@@ -1,75 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "featureHelpers.h"
-#include "errorHelpers.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include <vector>
-
-int get_device_cl_c_features(cl_device_id device, OpenCLCFeatures& features)
-{
-    // Initially, all features are unsupported.
-    features = { 0 };
-
-    // The CL_DEVICE_OPENCL_C_FEATURES query does not exist pre-3.0.
-    const Version version = get_device_cl_version(device);
-    if (version < Version(3, 0))
-    {
-        return TEST_PASS;
-    }
-
-    cl_int error = CL_SUCCESS;
-
-    size_t sz = 0;
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_FEATURES, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES size");
-
-    std::vector<cl_name_version> clc_features(sz / sizeof(cl_name_version));
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_FEATURES, sz,
-                            clc_features.data(), NULL);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES");
-
-#define CHECK_OPENCL_C_FEATURE(_feature)                                       \
-    if (strcmp(clc_feature.name, #_feature) == 0)                              \
-    {                                                                          \
-        features.supports##_feature = true;                                    \
-    }
-
-    for (const auto& clc_feature : clc_features)
-    {
-        CHECK_OPENCL_C_FEATURE(__opencl_c_3d_image_writes);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_order_acq_rel);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_order_seq_cst);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_scope_device);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_scope_all_devices);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_device_enqueue);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_generic_address_space);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_fp64);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_images);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_int64);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_pipes);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_program_scope_global_variables);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_read_write_images);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_subgroups);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_work_group_collective_functions);
-    }
-
-#undef CHECK_OPENCL_C_FEATURE
-
-    return TEST_PASS;
-}

diff --git a/test_common/harness/featureHelpers.h b/test_common/harness/featureHelpers.h
deleted file mode 100644
index 3f77b76..0000000
--- a/test_common/harness/featureHelpers.h
+++ /dev/null

@@ -1,43 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _featureHelpers_h
-#define _featureHelpers_h
-
-#include "compat.h"
-#include "testHarness.h"
-
-struct OpenCLCFeatures
-{
-    bool supports__opencl_c_3d_image_writes;
-    bool supports__opencl_c_atomic_order_acq_rel;
-    bool supports__opencl_c_atomic_order_seq_cst;
-    bool supports__opencl_c_atomic_scope_device;
-    bool supports__opencl_c_atomic_scope_all_devices;
-    bool supports__opencl_c_device_enqueue;
-    bool supports__opencl_c_generic_address_space;
-    bool supports__opencl_c_fp64;
-    bool supports__opencl_c_images;
-    bool supports__opencl_c_int64;
-    bool supports__opencl_c_pipes;
-    bool supports__opencl_c_program_scope_global_variables;
-    bool supports__opencl_c_read_write_images;
-    bool supports__opencl_c_subgroups;
-    bool supports__opencl_c_work_group_collective_functions;
-};
-
-int get_device_cl_c_features(cl_device_id device, OpenCLCFeatures& features);
-
-#endif // _featureHelpers_h

diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index 40826c5..4835db4 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,99 +16,89 @@
 #ifndef _fpcontrol_h
 #define _fpcontrol_h
 
-// In order to get tests for correctly rounded operations (e.g. multiply) to
-// work properly we need to be able to set the reference hardware to FTZ mode if
-// the device hardware is running in that mode.  We have explored all other
-// options short of writing correctly rounded operations in integer code, and
-// have found this is the only way to correctly verify operation.
+// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
+// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
+// in integer code, and have found this is the only way to correctly verify operation.
 //
-// Non-Apple implementations will need to provide their own implentation for
-// these features.  If the reference hardware and device are both running in the
-// same state (either FTZ or IEEE compliant modes) then these functions may be
-// empty.  If the device is running in non-default rounding mode (e.g. round
-// toward zero), then these functions should also set the reference device into
-// that rounding mode.
-#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__)              \
-    || defined(__MINGW32__)
-typedef int FPU_mode_type;
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-#include <xmmintrin.h>
-#elif defined(__PPC__)
-#include <fpu_control.h>
-extern __thread fpu_control_t fpu_control;
+// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
+// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
+// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
+#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
+    typedef int     FPU_mode_type;
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+    extern __thread fpu_control_t fpu_control;
 #endif
-// Set the reference hardware floating point unit to FTZ mode
-static inline void ForceFTZ(FPU_mode_type *mode)
-{
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-    *mode = _mm_getcsr();
-    _mm_setcsr(*mode | 0x8040);
-#elif defined(__PPC__)
-    *mode = fpu_control;
-    fpu_control |= _FPU_MASK_NI;
-#elif defined(__arm__)
-    unsigned fpscr;
-    __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
-    // Add 64 bit support
-#elif defined(__aarch64__)
-    unsigned fpscr;
-    __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
+    // Set the reference hardware floating point unit to FTZ mode
+    static inline void ForceFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode | 0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control |= _FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
 #else
-#error ForceFTZ needs an implentation
+        #error ForceFTZ needs an implentation
 #endif
-}
+    }
 
-// Disable the denorm flush to zero
-static inline void DisableFTZ(FPU_mode_type *mode)
-{
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-    *mode = _mm_getcsr();
-    _mm_setcsr(*mode & ~0x8040);
-#elif defined(__PPC__)
-    *mode = fpu_control;
-    fpu_control &= ~_FPU_MASK_NI;
-#elif defined(__arm__)
-    unsigned fpscr;
-    __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
-    // Add 64 bit support
-#elif defined(__aarch64__)
-    unsigned fpscr;
-    __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
+    // Disable the denorm flush to zero
+    static inline void DisableFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode & ~0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control &= ~_FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
 #else
-#error DisableFTZ needs an implentation
+    #error DisableFTZ needs an implentation
 #endif
-}
+    }
 
-// Restore the reference hardware to floating point state indicated by *mode
-static inline void RestoreFPState(FPU_mode_type *mode)
-{
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-    _mm_setcsr(*mode);
-#elif defined(__PPC__)
-    fpu_control = *mode;
-#elif defined(__arm__)
-    __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
-    // Add 64 bit support
-#elif defined(__aarch64__)
-    __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
+    // Restore the reference hardware to floating point state indicated by *mode
+    static inline void RestoreFPState( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        _mm_setcsr( *mode );
+#elif defined( __PPC__)
+        fpu_control = *mode;
+#elif defined (__arm__)
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
 #else
-#error RestoreFPState needs an implementation
+        #error RestoreFPState needs an implementation
 #endif
-}
+    }
 #else
-#error ForceFTZ and RestoreFPState need implentations
+        #error ForceFTZ and RestoreFPState need implentations
 #endif
 
 #endif

diff --git a/test_common/harness/genericThread.cpp b/test_common/harness/genericThread.cpp
index f50ee6e..2b742fa 100644
--- a/test_common/harness/genericThread.cpp
+++ b/test_common/harness/genericThread.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -21,34 +21,33 @@
 #include <pthread.h>
 #endif
 
-void *genericThread::IStaticReflector(void *data)
+void * genericThread::IStaticReflector( void * data )
 {
     genericThread *t = (genericThread *)data;
     return t->IRun();
 }
 
-bool genericThread::Start(void)
+bool genericThread::Start( void )
 {
 #if defined(_WIN32)
-    mHandle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)IStaticReflector,
-                           this, 0, NULL);
-    return (mHandle != NULL);
+    mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
+    return ( mHandle != NULL );
 #else // !_WIN32
-    int error = pthread_create((pthread_t *)&mHandle, NULL, IStaticReflector,
-                               (void *)this);
-    return (error == 0);
+    int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
+    return ( error == 0 );
 #endif // !_WIN32
 }
 
-void *genericThread::Join(void)
+void * genericThread::Join( void )
 {
 #if defined(_WIN32)
-    WaitForSingleObject((HANDLE)mHandle, INFINITE);
+    WaitForSingleObject( (HANDLE)mHandle, INFINITE );
     return NULL;
 #else // !_WIN32
-    void *retVal;
-    int error = pthread_join((pthread_t)mHandle, &retVal);
-    if (error != 0) retVal = NULL;
+    void * retVal;
+    int error = pthread_join( (pthread_t)mHandle, &retVal );
+    if( error != 0 )
+        retVal = NULL;
     return retVal;
 #endif // !_WIN32
 }

diff --git a/test_common/harness/genericThread.h b/test_common/harness/genericThread.h
index cc7c010..168b740 100644
--- a/test_common/harness/genericThread.h
+++ b/test_common/harness/genericThread.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,20 +18,25 @@
 
 #include <stdio.h>
 
-class genericThread {
-public:
-    virtual ~genericThread() {}
+class genericThread
+{
+    public:
 
-    bool Start(void);
-    void* Join(void);
+        virtual ~genericThread() {}
 
-protected:
-    virtual void* IRun(void) = 0;
+        bool    Start( void );
+        void *    Join( void );
 
-private:
-    void* mHandle;
+    protected:
 
-    static void* IStaticReflector(void* data);
+        virtual void *    IRun( void ) = 0;
+
+    private:
+
+        void* mHandle;
+
+        static void * IStaticReflector( void * data );
 };
 
 #endif // _genericThread_h
+

diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 72a2f0c..f36c153 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp

@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017,2021 The Khronos Group Inc.
-//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,30 +16,36 @@
 #include "imageHelpers.h"
 #include <limits.h>
 #include <assert.h>
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
 #include <sys/mman.h>
 #endif
-#if !defined(_WIN32) && !defined(__APPLE__)
+#if !defined (_WIN32) && !defined(__APPLE__)
 #include <malloc.h>
 #endif
 #include <algorithm>
 #include <iterator>
-#if !defined(_WIN32)
+#if !defined (_WIN32)
 #include <cmath>
 #endif
 
 RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
 
+static cl_ushort float2half_rte( float f );
+static cl_ushort float2half_rtz( float f );
+
 cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
 bool gTestRounding = false;
-double sRGBmap(float fc)
+double
+sRGBmap(float fc)
 {
     double c = (double)fc;
 
-#if !defined(_WIN32)
-    if (std::isnan(c)) c = 0.0;
+#if !defined (_WIN32)
+    if (std::isnan(c))
+        c = 0.0;
 #else
-    if (_isnan(c)) c = 0.0;
+    if (_isnan(c))
+        c = 0.0;
 #endif
 
     if (c > 1.0)
@@ -49,12 +55,13 @@
     else if (c < 0.0031308)
         c = 12.92 * c;
     else
-        c = (1055.0 / 1000.0) * pow(c, 5.0 / 12.0) - (55.0 / 1000.0);
+        c = (1055.0/1000.0) * pow(c, 5.0/12.0) - (55.0/1000.0);
 
     return c * 255.0;
 }
 
-double sRGBunmap(float fc)
+double
+sRGBunmap(float fc)
 {
     double c = (double)fc;
     double result;
@@ -68,19 +75,20 @@
 }
 
 
-uint32_t get_format_type_size(const cl_image_format *format)
+size_t get_format_type_size( const cl_image_format *format )
 {
-    return get_channel_data_type_size(format->image_channel_data_type);
+    return get_channel_data_type_size( format->image_channel_data_type );
 }
 
-uint32_t get_channel_data_type_size(cl_channel_type channelType)
+size_t get_channel_data_type_size( cl_channel_type channelType )
 {
-    switch (channelType)
+    switch( channelType )
     {
         case CL_SNORM_INT8:
         case CL_UNORM_INT8:
         case CL_SIGNED_INT8:
-        case CL_UNSIGNED_INT8: return 1;
+        case CL_UNSIGNED_INT8:
+            return 1;
 
         case CL_SNORM_INT16:
         case CL_UNORM_INT16:
@@ -90,10 +98,11 @@
 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
 #endif
-            return sizeof(cl_short);
+            return sizeof( cl_short );
 
         case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32: return sizeof(cl_int);
+        case CL_UNSIGNED_INT32:
+            return sizeof( cl_int );
 
         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
@@ -105,7 +114,8 @@
 
 #ifdef OBSOLETE_FORAMT
         case CL_UNORM_INT_8888:
-        case CL_UNORM_INT_8888_REV: return 4;
+        case CL_UNORM_INT_8888_REV:
+            return 4;
 #endif
 
         case CL_UNORM_INT_101010:
@@ -114,20 +124,22 @@
 #endif
             return 4;
 
-        case CL_FLOAT: return sizeof(cl_float);
+        case CL_FLOAT:
+            return sizeof( cl_float );
 
-        default: return 0;
+        default:
+            return 0;
     }
 }
 
-uint32_t get_format_channel_count(const cl_image_format *format)
+size_t get_format_channel_count( const cl_image_format *format )
 {
-    return get_channel_order_channel_count(format->image_channel_order);
+    return get_channel_order_channel_count( format->image_channel_order );
 }
 
-uint32_t get_channel_order_channel_count(cl_channel_order order)
+size_t get_channel_order_channel_count( cl_channel_order order )
 {
-    switch (order)
+    switch( order )
     {
         case CL_R:
         case CL_A:
@@ -135,16 +147,19 @@
         case CL_INTENSITY:
         case CL_LUMINANCE:
         case CL_DEPTH:
-        case CL_DEPTH_STENCIL: return 1;
+        case CL_DEPTH_STENCIL:
+            return 1;
 
         case CL_RG:
         case CL_RA:
-        case CL_RGx: return 2;
+        case CL_RGx:
+            return 2;
 
         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
-        case CL_sRGBx: return 3;
+        case CL_sRGBx:
+            return 3;
 
         case CL_RGBA:
         case CL_ARGB:
@@ -161,56 +176,56 @@
 #ifdef CL_ABGR_APPLE
         case CL_ABGR_APPLE:
 #endif
-            return 4;
+          return 4;
 
         default:
-            log_error("%s does not support 0x%x\n", __FUNCTION__, order);
-            return 0;
+          log_error("%s does not support 0x%x\n",__FUNCTION__,order);
+          return 0;
     }
 }
 
-cl_channel_type get_channel_type_from_name(const char *name)
+cl_channel_type  get_channel_type_from_name( const char *name )
 {
-    struct
-    {
+    struct {
         cl_channel_type type;
         const char *name;
-    } typeNames[] = { { CL_SNORM_INT8, "CL_SNORM_INT8" },
-                      { CL_SNORM_INT16, "CL_SNORM_INT16" },
-                      { CL_UNORM_INT8, "CL_UNORM_INT8" },
-                      { CL_UNORM_INT16, "CL_UNORM_INT16" },
-                      { CL_UNORM_INT24, "CL_UNORM_INT24" },
-                      { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
-                      { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
-                      { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
-                      { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
-                      { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
-                      { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
-                      { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
-                      { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
-                      { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
-                      { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
-                      { CL_FLOAT, "CL_FLOAT" },
+    } typeNames[] = {
+        { CL_SNORM_INT8, "CL_SNORM_INT8" },
+        { CL_SNORM_INT16, "CL_SNORM_INT16" },
+        { CL_UNORM_INT8, "CL_UNORM_INT8" },
+        { CL_UNORM_INT16, "CL_UNORM_INT16" },
+        { CL_UNORM_INT24, "CL_UNORM_INT24" },
+        { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
+        { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
+        { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
+        { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
+        { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
+        { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
+        { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
+        { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
+        { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
+        { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
+        { CL_FLOAT, "CL_FLOAT" },
 #ifdef CL_SFIXED14_APPLE
-                      { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
+        { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
 #endif
     };
-    for (size_t i = 0; i < sizeof(typeNames) / sizeof(typeNames[0]); i++)
+    for( size_t i = 0; i < sizeof( typeNames ) / sizeof( typeNames[ 0 ] ); i++ )
     {
-        if (strcmp(typeNames[i].name, name) == 0
-            || strcmp(typeNames[i].name + 3, name) == 0)
-            return typeNames[i].type;
+        if( strcmp( typeNames[ i ].name, name ) == 0 || strcmp( typeNames[ i ].name + 3, name ) == 0 )
+            return typeNames[ i ].type;
     }
     return (cl_channel_type)-1;
 }
 
-cl_channel_order get_channel_order_from_name(const char *name)
+cl_channel_order  get_channel_order_from_name( const char *name )
 {
     const struct
     {
-        cl_channel_order order;
-        const char *name;
-    } orderNames[] = {
+        cl_channel_order    order;
+        const char          *name;
+    }orderNames[] =
+    {
         { CL_R, "CL_R" },
         { CL_A, "CL_A" },
         { CL_Rx, "CL_Rx" },
@@ -222,8 +237,8 @@
         { CL_RGBA, "CL_RGBA" },
         { CL_BGRA, "CL_BGRA" },
         { CL_ARGB, "CL_ARGB" },
-        { CL_INTENSITY, "CL_INTENSITY" },
-        { CL_LUMINANCE, "CL_LUMINANCE" },
+        { CL_INTENSITY, "CL_INTENSITY"},
+        { CL_LUMINANCE, "CL_LUMINANCE"},
         { CL_DEPTH, "CL_DEPTH" },
         { CL_DEPTH_STENCIL, "CL_DEPTH_STENCIL" },
         { CL_sRGB, "CL_sRGB" },
@@ -239,19 +254,18 @@
 #endif
     };
 
-    for (size_t i = 0; i < sizeof(orderNames) / sizeof(orderNames[0]); i++)
+    for( size_t i = 0; i < sizeof( orderNames ) / sizeof( orderNames[ 0 ] ); i++ )
     {
-        if (strcmp(orderNames[i].name, name) == 0
-            || strcmp(orderNames[i].name + 3, name) == 0)
-            return orderNames[i].order;
+        if( strcmp( orderNames[ i ].name, name ) == 0 || strcmp( orderNames[ i ].name + 3, name ) == 0 )
+            return orderNames[ i ].order;
     }
     return (cl_channel_order)-1;
 }
 
 
-int is_format_signed(const cl_image_format *format)
+int is_format_signed( const cl_image_format *format )
 {
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
     {
         case CL_SNORM_INT8:
         case CL_SIGNED_INT8:
@@ -265,269 +279,163 @@
 #endif
             return 1;
 
-        default: return 0;
+        default:
+            return 0;
     }
 }
 
-uint32_t get_pixel_size(const cl_image_format *format)
+size_t get_pixel_size( cl_image_format *format )
 {
-    switch (format->image_channel_data_type)
-    {
-        case CL_SNORM_INT8:
-        case CL_UNORM_INT8:
-        case CL_SIGNED_INT8:
-        case CL_UNSIGNED_INT8: return get_format_channel_count(format);
+  switch( format->image_channel_data_type )
+  {
+    case CL_SNORM_INT8:
+    case CL_UNORM_INT8:
+    case CL_SIGNED_INT8:
+    case CL_UNSIGNED_INT8:
+      return get_format_channel_count( format );
 
-        case CL_SNORM_INT16:
-        case CL_UNORM_INT16:
-        case CL_SIGNED_INT16:
-        case CL_UNSIGNED_INT16:
-        case CL_HALF_FLOAT:
-#ifdef CL_SFIXED14_APPLE
+    case CL_SNORM_INT16:
+    case CL_UNORM_INT16:
+    case CL_SIGNED_INT16:
+    case CL_UNSIGNED_INT16:
+    case CL_HALF_FLOAT:
+#ifdef  CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
 #endif
-            return get_format_channel_count(format) * sizeof(cl_ushort);
+      return get_format_channel_count( format ) * sizeof( cl_ushort );
 
-        case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32:
-            return get_format_channel_count(format) * sizeof(cl_int);
+    case CL_SIGNED_INT32:
+    case CL_UNSIGNED_INT32:
+      return get_format_channel_count( format ) * sizeof( cl_int );
 
-        case CL_UNORM_SHORT_565:
-        case CL_UNORM_SHORT_555:
+    case CL_UNORM_SHORT_565:
+    case CL_UNORM_SHORT_555:
 #ifdef OBSOLETE_FORAMT
-        case CL_UNORM_SHORT_565_REV:
-        case CL_UNORM_SHORT_555_REV:
+    case CL_UNORM_SHORT_565_REV:
+    case CL_UNORM_SHORT_555_REV:
 #endif
-            return 2;
+      return 2;
 
 #ifdef OBSOLETE_FORAMT
-        case CL_UNORM_INT_8888:
-        case CL_UNORM_INT_8888_REV: return 4;
+    case CL_UNORM_INT_8888:
+    case CL_UNORM_INT_8888_REV:
+      return 4;
 #endif
 
-        case CL_UNORM_INT_101010:
+    case CL_UNORM_INT_101010:
 #ifdef OBSOLETE_FORAMT
-        case CL_UNORM_INT_101010_REV:
+    case CL_UNORM_INT_101010_REV:
 #endif
-            return 4;
+      return 4;
 
-        case CL_FLOAT:
-            return get_format_channel_count(format) * sizeof(cl_float);
+    case CL_FLOAT:
+      return get_format_channel_count( format ) * sizeof( cl_float );
 
-        default: return 0;
-    }
+    default:
+      return 0;
+  }
 }
 
-uint32_t next_power_of_two(uint32_t v)
+int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
 {
-    v--;
-    v |= v >> 1;
-    v |= v >> 2;
-    v |= v >> 4;
-    v |= v >> 8;
-    v |= v >> 16;
-    v++;
-    return v;
-}
-
-uint32_t get_pixel_alignment(const cl_image_format *format)
-{
-    return next_power_of_two(get_pixel_size(format));
-}
-
-int get_8_bit_image_format(cl_context context, cl_mem_object_type objType,
-                           cl_mem_flags flags, size_t channelCount,
-                           cl_image_format *outFormat)
-{
-    cl_image_format formatList[128];
+    cl_image_format formatList[ 128 ];
     unsigned int outFormatCount, i;
     int error;
 
 
     /* Make sure each image format is supported */
-    if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
-                                            formatList, &outFormatCount)))
-        return error;
+    if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
 
 
     /* Look for one that is an 8-bit format */
-    for (i = 0; i < outFormatCount; i++)
+    for( i = 0; i < outFormatCount; i++ )
     {
-        if (formatList[i].image_channel_data_type == CL_SNORM_INT8
-            || formatList[i].image_channel_data_type == CL_UNORM_INT8
-            || formatList[i].image_channel_data_type == CL_SIGNED_INT8
-            || formatList[i].image_channel_data_type == CL_UNSIGNED_INT8)
+        if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
+       formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
         {
-            if (!channelCount
-                || (channelCount
-                    && (get_format_channel_count(&formatList[i])
-                        == channelCount)))
-            {
-                *outFormat = formatList[i];
-                return 0;
-            }
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
         }
     }
 
     return -1;
 }
 
-int get_32_bit_image_format(cl_context context, cl_mem_object_type objType,
-                            cl_mem_flags flags, size_t channelCount,
-                            cl_image_format *outFormat)
+int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
 {
-    cl_image_format formatList[128];
+    cl_image_format formatList[ 128 ];
     unsigned int outFormatCount, i;
     int error;
 
 
-    /* Make sure each image format is supported */
-    if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
-                                            formatList, &outFormatCount)))
-        return error;
+  /* Make sure each image format is supported */
+  if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
 
-    /* Look for one that is an 8-bit format */
-    for (i = 0; i < outFormatCount; i++)
+  /* Look for one that is an 8-bit format */
+  for( i = 0; i < outFormatCount; i++ )
+  {
+        if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
+            formatList[ i ].image_channel_data_type == CL_FLOAT ||
+            formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
+            formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
     {
-        if (formatList[i].image_channel_data_type == CL_UNORM_INT_101010
-            || formatList[i].image_channel_data_type == CL_FLOAT
-            || formatList[i].image_channel_data_type == CL_SIGNED_INT32
-            || formatList[i].image_channel_data_type == CL_UNSIGNED_INT32)
-        {
-            if (!channelCount
-                || (channelCount
-                    && (get_format_channel_count(&formatList[i])
-                        == channelCount)))
-            {
-                *outFormat = formatList[i];
-                return 0;
-            }
-        }
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+    }
     }
 
     return -1;
 }
 
-void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
-                                        const char *destPixel,
-                                        image_descriptor *imageInfo, size_t y,
-                                        size_t thirdDim)
+int random_log_in_range( int minV, int maxV, MTdata d  )
 {
-    size_t pixel_size = get_pixel_size(imageInfo->format);
-
-    log_error("ERROR: Scanline %d did not verify for image size %d,%d,%d "
-              "pitch %d (extra %d bytes)\n",
-              (int)y, (int)imageInfo->width, (int)imageInfo->height,
-              (int)thirdDim, (int)imageInfo->rowPitch,
-              (int)imageInfo->rowPitch
-                  - (int)imageInfo->width * (int)pixel_size);
-    log_error("Failed at column: %ld   ", where);
-
-    switch (pixel_size)
-    {
-        case 1:
-            log_error("*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar *)sourcePixel)[0],
-                      ((cl_uchar *)destPixel)[0]);
-            break;
-        case 2:
-            log_error("*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort *)sourcePixel)[0],
-                      ((cl_ushort *)destPixel)[0]);
-            break;
-        case 3:
-            log_error("*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. "
-                      "{0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
-                      ((cl_uchar *)sourcePixel)[0],
-                      ((cl_uchar *)sourcePixel)[1],
-                      ((cl_uchar *)sourcePixel)[2], ((cl_uchar *)destPixel)[0],
-                      ((cl_uchar *)destPixel)[1], ((cl_uchar *)destPixel)[2]);
-            break;
-        case 4:
-            log_error("*0x%8.8x vs. 0x%8.8x\n", ((cl_uint *)sourcePixel)[0],
-                      ((cl_uint *)destPixel)[0]);
-            break;
-        case 6:
-            log_error(
-                "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. "
-                "{0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
-                ((cl_ushort *)sourcePixel)[0], ((cl_ushort *)sourcePixel)[1],
-                ((cl_ushort *)sourcePixel)[2], ((cl_ushort *)destPixel)[0],
-                ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
-            break;
-        case 8:
-            log_error("*0x%16.16llx vs. 0x%16.16llx\n",
-                      ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
-            break;
-        case 12:
-            log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
-                      "{0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
-                      ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
-                      ((cl_uint *)sourcePixel)[2], ((cl_uint *)destPixel)[0],
-                      ((cl_uint *)destPixel)[1], ((cl_uint *)destPixel)[2]);
-            break;
-        case 16:
-            log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
-                      "{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
-                      ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
-                      ((cl_uint *)sourcePixel)[2], ((cl_uint *)sourcePixel)[3],
-                      ((cl_uint *)destPixel)[0], ((cl_uint *)destPixel)[1],
-                      ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
-            break;
-        default:
-            log_error("Don't know how to print pixel size of %ld\n",
-                      pixel_size);
-            break;
-    }
-}
-
-int random_log_in_range(int minV, int maxV, MTdata d)
-{
-    double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1);
-    int iv = (int)((float)(maxV - minV) * v);
+    double v = log2( ( (double)genrand_int32(d) / (double)0xffffffff ) + 1 );
+    int iv = (int)( (float)( maxV - minV ) * v );
     return iv + minV;
 }
 
 
 // Define the addressing functions
-typedef int (*AddressFn)(int value, size_t maxValue);
+typedef int (*AddressFn)( int value, size_t maxValue );
 
-int NoAddressFn(int value, size_t maxValue) { return value; }
-int RepeatAddressFn(int value, size_t maxValue)
+int         NoAddressFn( int value, size_t maxValue )               { return value; }
+int         RepeatAddressFn( int value, size_t maxValue )
 {
-    if (value < 0)
+    if( value < 0 )
         value += (int)maxValue;
-    else if (value >= (int)maxValue)
+    else if( value >= (int)maxValue )
         value -= (int)maxValue;
     return value;
 }
-int MirroredRepeatAddressFn(int value, size_t maxValue)
+int         MirroredRepeatAddressFn( int value, size_t maxValue )
 {
-    if (value < 0)
-        value = 0;
-    else if ((size_t)value >= maxValue)
-        value = (int)(maxValue - 1);
+    if( value < 0 )
+        value  = 0;
+    else if( (size_t) value >= maxValue )
+        value = (int) (maxValue - 1);
     return value;
 }
-int ClampAddressFn(int value, size_t maxValue)
-{
-    return (value < -1) ? -1
-                        : ((value > (cl_long)maxValue) ? (int)maxValue : value);
-}
-int ClampToEdgeNearestFn(int value, size_t maxValue)
-{
-    return (value < 0)
-        ? 0
-        : (((size_t)value > maxValue - 1) ? (int)maxValue - 1 : value);
-}
-AddressFn ClampToEdgeLinearFn = ClampToEdgeNearestFn;
+int         ClampAddressFn( int value, size_t maxValue )            { return ( value < -1 ) ? -1 : ( ( value > (cl_long) maxValue ) ? (int)maxValue : value ); }
+int         ClampToEdgeNearestFn( int value, size_t maxValue )  { return ( value < 0 ) ? 0 : ( ( (size_t)value > maxValue - 1 ) ? (int)maxValue - 1 : value ); }
+AddressFn   ClampToEdgeLinearFn                                                 = ClampToEdgeNearestFn;
 
-// Note: normalized coords get repeated in normalized space, not unnormalized
-// space! hence the special case here
+// Note: normalized coords get repeated in normalized space, not unnormalized space! hence the special case here
 volatile float gFloatHome;
-float RepeatNormalizedAddressFn(float fValue, size_t maxValue)
+float           RepeatNormalizedAddressFn( float fValue, size_t maxValue )
 {
 #ifndef _MSC_VER // Use original if not the VS compiler.
     // General computation for repeat
-    return (fValue - floorf(fValue)) * (float)maxValue; // Reduce to [0, 1.f]
+    return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
 #else // Otherwise, use this instead:
     // Home the subtraction to a float to break up the sequence of x87
     // instructions emitted by the VS compiler.
@@ -536,94 +444,91 @@
 #endif
 }
 
-float MirroredRepeatNormalizedAddressFn(float fValue, size_t maxValue)
+float           MirroredRepeatNormalizedAddressFn( float fValue, size_t maxValue )
 {
-    // Round to nearest multiple of two.
-    // Note halfway values flip flop here due to rte, but they both end up
-    // pointing the same place at the end of the day.
-    float s_prime = 2.0f * rintf(fValue * 0.5f);
+    // Round to nearest multiple of two
+    float s_prime = 2.0f * rintf( fValue * 0.5f );        // Note halfway values flip flop here due to rte, but they both end up pointing the same place at the end of the day
 
     // Reduce to [-1, 1], Apply mirroring -> [0, 1]
-    s_prime = fabsf(fValue - s_prime);
+    s_prime = fabsf( fValue - s_prime );
 
     // un-normalize
-    return s_prime * (float)maxValue;
+    return s_prime * (float) maxValue;
 }
 
 struct AddressingTable
 {
     AddressingTable()
     {
-        ct_assert((CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6));
-        ct_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2);
+        ct_assert( ( CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6 ) );
+        ct_assert( CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2 );
 
-        mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn;
-        mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = NoAddressFn;
-        mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = RepeatAddressFn;
-        mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = RepeatAddressFn;
-        mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampToEdgeNearestFn;
-        mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampToEdgeLinearFn;
-        mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampAddressFn;
-        mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampAddressFn;
-        mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
-        mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
+        mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]            = NoAddressFn;
+        mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]             = NoAddressFn;
+        mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]          = RepeatAddressFn;
+        mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]           = RepeatAddressFn;
+        mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]   = ClampToEdgeNearestFn;
+        mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]    = ClampToEdgeLinearFn;
+        mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]           = ClampAddressFn;
+        mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]            = ClampAddressFn;
+        mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = MirroredRepeatAddressFn;
+        mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]  = MirroredRepeatAddressFn;
     }
 
-    AddressFn operator[](image_sampler_data *sampler)
+    AddressFn operator[]( image_sampler_data *sampler )
     {
-        return mTable[(int)sampler->addressing_mode - CL_ADDRESS_NONE]
-                     [(int)sampler->filter_mode - CL_FILTER_NEAREST];
+        return mTable[ (int)sampler->addressing_mode - CL_ADDRESS_NONE ][ (int)sampler->filter_mode - CL_FILTER_NEAREST ];
     }
 
-    AddressFn mTable[6][2];
+    AddressFn mTable[ 6 ][ 2 ];
 };
 
-static AddressingTable sAddressingTable;
+static AddressingTable  sAddressingTable;
 
-bool is_sRGBA_order(cl_channel_order image_channel_order)
-{
-    switch (image_channel_order)
-    {
+bool is_sRGBA_order(cl_channel_order image_channel_order){
+    switch (image_channel_order) {
         case CL_sRGB:
         case CL_sRGBx:
         case CL_sRGBA:
-        case CL_sBGRA: return true;
-        default: return false;
+        case CL_sBGRA:
+            return true;
+        default:
+            return false;
     }
 }
 
 // Format helpers
 
-int has_alpha(const cl_image_format *format)
-{
-    switch (format->image_channel_order)
-    {
-        case CL_R: return 0;
-        case CL_A: return 1;
-        case CL_Rx: return 0;
-        case CL_RG: return 0;
-        case CL_RA: return 1;
-        case CL_RGx: return 0;
+int has_alpha(cl_image_format *format) {
+    switch (format->image_channel_order) {
+        case CL_R:
+            return 0;
+        case CL_A:
+            return 1;
+        case CL_Rx:
+            return 0;
+        case CL_RG:
+            return 0;
+        case CL_RA:
+            return 1;
+        case CL_RGx:
+            return 0;
         case CL_RGB:
-        case CL_sRGB: return 0;
+        case CL_sRGB:
+            return 0;
         case CL_RGBx:
-        case CL_sRGBx: return 0;
-        case CL_RGBA: return 1;
-        case CL_BGRA: return 1;
-        case CL_ARGB: return 1;
-        case CL_ABGR: return 1;
-        case CL_INTENSITY: return 1;
-        case CL_LUMINANCE: return 0;
+        case CL_sRGBx:
+            return 0;
+        case CL_RGBA:
+            return 1;
+        case CL_BGRA:
+            return 1;
+        case CL_ARGB:
+            return 1;
+        case CL_INTENSITY:
+            return 1;
+        case CL_LUMINANCE:
+            return 0;
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE: return 1;
 #endif
@@ -631,278 +536,235 @@
         case CL_1RGB_APPLE: return 1;
 #endif
         case CL_sRGBA:
-        case CL_sBGRA: return 1;
-        case CL_DEPTH: return 0;
+        case CL_sBGRA:
+            return 1;
+        case CL_DEPTH:
+            return 0;
         default:
-            log_error("Invalid image channel order: %d\n",
-                      format->image_channel_order);
+            log_error("Invalid image channel order: %d\n", format->image_channel_order);
             return 0;
     }
+
 }
 
 #define PRINT_MAX_SIZE_LOGIC 0
 
-#define SWAP(_a, _b)                                                           \
-    do                                                                         \
-    {                                                                          \
-        _a ^= _b;                                                              \
-        _b ^= _a;                                                              \
-        _a ^= _b;                                                              \
-    } while (0)
+#define SWAP( _a, _b )      do{ _a ^= _b; _b ^= _a; _a ^= _b; }while(0)
 #ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
+    #define MAX( _a, _b )   ((_a) > (_b) ? (_a) : (_b))
 #endif
 
-void get_max_sizes(
-    size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3],
-    size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
-    const cl_ulong maxIndividualAllocSize, // CL_DEVICE_MAX_MEM_ALLOC_SIZE
-    const cl_ulong maxTotalAllocSize, // CL_DEVICE_GLOBAL_MEM_SIZE
-    cl_mem_object_type image_type, const cl_image_format *format,
-    int usingMaxPixelSizeBuffer)
-{
+void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize,       // CL_DEVICE_MAX_MEM_ALLOC_SIZE
+                   const cl_ulong maxTotalAllocSize,            // CL_DEVICE_GLOBAL_MEM_SIZE
+                   cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSizeBuffer) {
 
     bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
-    bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
-                    || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
+    bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
 
     // Validate we have a reasonable max depth for 3D
-    if (is3D && maxDepth < 2)
-    {
-        log_error("ERROR: Requesting max image sizes for 3D images when max "
-                  "depth is < 2.\n");
+    if (is3D && maxDepth < 2) {
+        log_error("ERROR: Requesting max image sizes for 3D images when max depth is < 2.\n");
         *numberOfSizes = 0;
         return;
     }
     // Validate we have a reasonable max array size for 1D & 2D image arrays
-    if (isArray && maxArraySize < 2)
-    {
-        log_error("ERROR: Requesting max image sizes for an image array when "
-                  "max array size is < 1.\n");
+    if (isArray && maxArraySize < 2) {
+        log_error("ERROR: Requesting max image sizes for an image array when max array size is < 1.\n");
         *numberOfSizes = 0;
         return;
     }
 
-    // Reduce the maximum because we are trying to test the max image
-    // dimensions, not the memory allocation
+    // Reduce the maximum because we are trying to test the max image dimensions, not the memory allocation
     cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
     cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
-    log_info("Note: max individual allocation adjusted down from %gMB to %gMB "
-             "and max total allocation adjusted down from %gMB to %gMB.\n",
-             maxIndividualAllocSize / (1024.0 * 1024.0),
-             adjustedMaxIndividualAllocSize / (1024.0 * 1024.0),
-             maxTotalAllocSize / (1024.0 * 1024.0),
-             adjustedMaxTotalAllocSize / (1024.0 * 1024.0));
+    log_info("Note: max individual allocation adjusted down from %gMB to %gMB and max total allocation adjusted down from %gMB to %gMB.\n",
+             maxIndividualAllocSize/(1024.0*1024.0), adjustedMaxIndividualAllocSize/(1024.0*1024.0),
+             maxTotalAllocSize/(1024.0*1024.0), adjustedMaxTotalAllocSize/(1024.0*1024.0));
 
     // Cap our max allocation to 1.0GB.
-    // FIXME -- why?  In the interest of not taking a long time?  We should
-    // still test this stuff...
-    if (adjustedMaxTotalAllocSize > (cl_ulong)1024 * 1024 * 1024)
-    {
-        adjustedMaxTotalAllocSize = (cl_ulong)1024 * 1024 * 1024;
-        log_info("Limiting max total allocation size to %gMB (down from %gMB) "
-                 "for test.\n",
-                 adjustedMaxTotalAllocSize / (1024.0 * 1024.0),
-                 maxTotalAllocSize / (1024.0 * 1024.0));
+    // FIXME -- why?  In the interest of not taking a long time?  We should still test this stuff...
+    if (adjustedMaxTotalAllocSize > (cl_ulong)1024*1024*1024) {
+      adjustedMaxTotalAllocSize = (cl_ulong)1024*1024*1024;
+      log_info("Limiting max total allocation size to %gMB (down from %gMB) for test.\n",
+        adjustedMaxTotalAllocSize/(1024.0*1024.0), maxTotalAllocSize/(1024.0*1024.0));
     }
 
     cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
-    if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize * 2)
-        maxAllocSize = adjustedMaxTotalAllocSize / 2;
+    if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize*2)
+        maxAllocSize = adjustedMaxTotalAllocSize/2;
 
     size_t raw_pixel_size = get_pixel_size(format);
-    // If the test will be creating input (src) buffer of type int4 or float4,
-    // number of pixels will be governed by sizeof(int4 or float4) and not
-    // sizeof(dest fomat) Also if pixel size is 12 bytes i.e. RGB or RGBx, we
-    // adjust it to 16 bytes as GPUs has no concept of 3 channel images. GPUs
-    // expand these to four channel RGBA.
-    if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16;
+    // If the test will be creating input (src) buffer of type int4 or float4, number of pixels will be
+    // governed by sizeof(int4 or float4) and not sizeof(dest fomat)
+    // Also if pixel size is 12 bytes i.e. RGB or RGBx, we adjust it to 16 bytes as GPUs has no concept
+    // of 3 channel images. GPUs expand these to four channel RGBA.
+    if(usingMaxPixelSizeBuffer || raw_pixel_size == 12)
+      raw_pixel_size = 16;
     size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
 
-    log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, "
-             "per-allocation limit %gMB.\n",
-             maxWidth, maxHeight, isArray ? maxArraySize : maxDepth,
-             raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0)));
+    log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, per-allocation limit %gMB.\n",
+             maxWidth, maxHeight, isArray ? maxArraySize : maxDepth, raw_pixel_size, (maxAllocSize/(1024.0*1024.0)));
 
-    // Keep track of the maximum sizes for each dimension
-    size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
+  // Keep track of the maximum sizes for each dimension
+  size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
 
-    switch (image_type)
-    {
-        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            maximum_sizes[1] = maxArraySize;
-            maximum_sizes[2] = 1;
-            break;
-        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            maximum_sizes[2] = maxArraySize;
-            break;
-    }
+  switch (image_type) {
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      maximum_sizes[1] = maxArraySize;
+      maximum_sizes[2] = 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      maximum_sizes[2] = maxArraySize;
+      break;
+  }
 
 
-        // Given one fixed sized dimension, this code finds one or two other
-        // dimensions, both with very small size, such that the size does not
-        // exceed the maximum passed to this function
+  // Given one fixed sized dimension, this code finds one or two other dimensions,
+  // both with very small size, such that the size does not exceed the maximum
+  // passed to this function
 
-#if defined(__x86_64) || defined(__arm64__) || defined(__ppc64__)
-    size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15 };
+#if defined(__x86_64) || defined (__arm64__) || defined (__ppc64__)
+  size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15};
 #else
-    size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13 };
+  size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13};
 #endif
 
-    static size_t other_size = 0;
-    enum
-    {
-        num_other_sizes = sizeof(other_sizes) / sizeof(size_t)
-    };
+  static size_t other_size = 0;
+  enum { num_other_sizes = sizeof(other_sizes)/sizeof(size_t) };
 
-    (*numberOfSizes) = 0;
+  (*numberOfSizes) = 0;
 
-    if (image_type == CL_MEM_OBJECT_IMAGE1D)
-    {
+  if (image_type == CL_MEM_OBJECT_IMAGE1D) {
 
-        double M = maximum_sizes[0];
+    double M = maximum_sizes[0];
 
-        // Store the size
-        sizes[(*numberOfSizes)][0] = (size_t)M;
-        sizes[(*numberOfSizes)][1] = 1;
-        sizes[(*numberOfSizes)][2] = 1;
-        ++(*numberOfSizes);
+    // Store the size
+    sizes[(*numberOfSizes)][0] = (size_t)M;
+    sizes[(*numberOfSizes)][1] = 1;
+    sizes[(*numberOfSizes)][2] = 1;
+    ++(*numberOfSizes);
+  }
+
+  else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D) {
+
+    for (int fixed_dim=0;fixed_dim<2;++fixed_dim) {
+
+      // Determine the size of the fixed dimension
+      double M = maximum_sizes[fixed_dim];
+      double A = max_pixels;
+
+      int x0_dim = !fixed_dim;
+      double x0  = fmin(fmin(other_sizes[(other_size++)%num_other_sizes],A/M), maximum_sizes[x0_dim]);
+
+      // Store the size
+      sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
+      sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
+      sizes[(*numberOfSizes)][2]         = 1;
+      ++(*numberOfSizes);
     }
+  }
 
-    else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
-             || image_type == CL_MEM_OBJECT_IMAGE2D)
-    {
+  else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE3D) {
 
-        for (int fixed_dim = 0; fixed_dim < 2; ++fixed_dim)
-        {
+    // Iterate over dimensions, finding sizes for the non-fixed dimension
+    for (int fixed_dim=0;fixed_dim<3;++fixed_dim) {
 
-            // Determine the size of the fixed dimension
-            double M = maximum_sizes[fixed_dim];
-            double A = max_pixels;
+      // Determine the size of the fixed dimension
+      double M = maximum_sizes[fixed_dim];
+      double A = max_pixels;
 
-            int x0_dim = !fixed_dim;
-            double x0 =
-                fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M),
-                     maximum_sizes[x0_dim]);
+      // Find two other dimensions, x0 and x1
+      int x0_dim = (fixed_dim == 0) ? 1 : 0;
+      int x1_dim = (fixed_dim == 2) ? 1 : 2;
 
-            // Store the size
-            sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
-            sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
-            sizes[(*numberOfSizes)][2] = 1;
-            ++(*numberOfSizes);
-        }
+      // Choose two other sizes for these dimensions
+      double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
+      // GPUs have certain restrictions on minimum width (row alignment) of images which has given us issues
+      // testing small widths in this test (say we set width to 3 for testing, and compute size based on this width and decide
+      // it fits within vram ... but GPU driver decides that, due to row alignment requirements, it has to use
+      // width of 16 which doesnt fit in vram). For this purpose we are not testing width < 16 for this test.
+      if(x0_dim == 0 && x0 < 16)
+        x0 = 16;
+      double x1 = fmin(fmin(A/M/x0,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
+
+      // Valid image sizes cannot be below 1. Due to the workaround for the xo_dim where x0 is overidden to 16
+      // there might not be enough space left for x1 dimension. This could be a fractional 0.x size that when cast to
+      // integer would result in a value 0. In these cases we clamp the size to a minimum of 1.
+      if ( x1 < 1 )
+        x1 = 1;
+
+      // M and x0 cannot be '0' as they derive from clDeviceInfo calls
+      assert(x0 > 0 && M > 0);
+
+      // Store the size
+      sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
+      sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
+      sizes[(*numberOfSizes)][x1_dim]    = (size_t)x1;
+      ++(*numberOfSizes);
     }
+  }
 
-    else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY
-             || image_type == CL_MEM_OBJECT_IMAGE3D)
-    {
-
-        // Iterate over dimensions, finding sizes for the non-fixed dimension
-        for (int fixed_dim = 0; fixed_dim < 3; ++fixed_dim)
-        {
-
-            // Determine the size of the fixed dimension
-            double M = maximum_sizes[fixed_dim];
-            double A = max_pixels;
-
-            // Find two other dimensions, x0 and x1
-            int x0_dim = (fixed_dim == 0) ? 1 : 0;
-            int x1_dim = (fixed_dim == 2) ? 1 : 2;
-
-            // Choose two other sizes for these dimensions
-            double x0 = fmin(fmin(A / M, maximum_sizes[x0_dim]),
-                             other_sizes[(other_size++) % num_other_sizes]);
-            // GPUs have certain restrictions on minimum width (row alignment)
-            // of images which has given us issues testing small widths in this
-            // test (say we set width to 3 for testing, and compute size based
-            // on this width and decide it fits within vram ... but GPU driver
-            // decides that, due to row alignment requirements, it has to use
-            // width of 16 which doesnt fit in vram). For this purpose we are
-            // not testing width < 16 for this test.
-            if (x0_dim == 0 && x0 < 16) x0 = 16;
-            double x1 = fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
-                             other_sizes[(other_size++) % num_other_sizes]);
-
-            // Valid image sizes cannot be below 1. Due to the workaround for
-            // the xo_dim where x0 is overidden to 16 there might not be enough
-            // space left for x1 dimension. This could be a fractional 0.x size
-            // that when cast to integer would result in a value 0. In these
-            // cases we clamp the size to a minimum of 1.
-            if (x1 < 1) x1 = 1;
-
-            // M and x0 cannot be '0' as they derive from clDeviceInfo calls
-            assert(x0 > 0 && M > 0);
-
-            // Store the size
-            sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
-            sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
-            sizes[(*numberOfSizes)][x1_dim] = (size_t)x1;
-            ++(*numberOfSizes);
-        }
+  // Log the results
+  for (int j=0; j<(int)(*numberOfSizes); j++) {
+    switch (image_type) {
+      case CL_MEM_OBJECT_IMAGE1D:
+        log_info(" size[%d] = [%ld] (%g MB image)\n",
+                 j, sizes[j][0], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      case CL_MEM_OBJECT_IMAGE2D:
+        log_info(" size[%d] = [%ld %ld] (%g MB image)\n",
+                 j, sizes[j][0], sizes[j][1], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      case CL_MEM_OBJECT_IMAGE3D:
+        log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n",
+                 j, sizes[j][0], sizes[j][1], sizes[j][2], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
     }
+  }
+}
 
-    // Log the results
-    for (int j = 0; j < (int)(*numberOfSizes); j++)
-    {
-        switch (image_type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                log_info(" size[%d] = [%ld] (%g MB image)\n", j, sizes[j][0],
-                         raw_pixel_size * sizes[j][0] * sizes[j][1]
-                             * sizes[j][2] / (1024.0 * 1024.0));
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE2D:
-                log_info(" size[%d] = [%ld %ld] (%g MB image)\n", j,
-                         sizes[j][0], sizes[j][1],
-                         raw_pixel_size * sizes[j][0] * sizes[j][1]
-                             * sizes[j][2] / (1024.0 * 1024.0));
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE3D:
-                log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n", j,
-                         sizes[j][0], sizes[j][1], sizes[j][2],
-                         raw_pixel_size * sizes[j][0] * sizes[j][1]
-                             * sizes[j][2] / (1024.0 * 1024.0));
-                break;
-        }
+float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
+    if (sampler->filter_mode == CL_FILTER_NEAREST)
+        return 0.0f;
+
+    switch (format->image_channel_data_type) {
+        case CL_SNORM_INT8:
+            return 1.0f/127.0f;
+        case CL_UNORM_INT8:
+            return 1.0f/255.0f;
+        case CL_UNORM_INT16:
+            return 1.0f/65535.0f;
+        case CL_SNORM_INT16:
+            return 1.0f/32767.0f;
+        case CL_FLOAT:
+            return CL_FLT_MIN;
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 0x1.0p-14f;
+#endif
+        default:
+            return 0.0f;
     }
 }
 
-float get_max_absolute_error(const cl_image_format *format,
-                             image_sampler_data *sampler)
-{
-    if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f;
-
-    switch (format->image_channel_data_type)
-    {
-        case CL_SNORM_INT8: return 1.0f / 127.0f;
-        case CL_UNORM_INT8: return 1.0f / 255.0f;
-        case CL_UNORM_INT16: return 1.0f / 65535.0f;
-        case CL_SNORM_INT16: return 1.0f / 32767.0f;
-        case CL_FLOAT: return CL_FLT_MIN;
-#ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: return 0x1.0p-14f;
-#endif
-        default: return 0.0f;
-    }
-}
-
-float get_max_relative_error(const cl_image_format *format,
-                             image_sampler_data *sampler, int is3D,
-                             int isLinearFilter)
+float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter )
 {
     float maxError = 0.0f;
     float sampleCount = 1.0f;
-    if (isLinearFilter) sampleCount = is3D ? 8.0f : 4.0f;
+    if( isLinearFilter )
+        sampleCount =  is3D ? 8.0f : 4.0f;
 
-    // Note that the ULP is defined here as the unit in the last place of the
-    // maximum magnitude sample used for filtering.
+    // Note that the ULP is defined here as the unit in the last place of the maximum
+    // magnitude sample used for filtering.
 
     // Section 8.3
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
     {
-        // The spec allows 2 ulps of error for normalized formats
+            // The spec allows 2 ulps of error for normalized formats
         case CL_SNORM_INT8:
         case CL_UNORM_INT8:
         case CL_SNORM_INT16:
@@ -910,42 +772,34 @@
         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
         case CL_UNORM_INT_101010:
-            // Maximum sampling error for round to zero normalization based on
-            // multiplication by reciprocal (using reciprocal generated in
-            // round to +inf mode, so that 1.0 matches spec)
-            maxError = 2 * FLT_EPSILON * sampleCount;
+            maxError = 2*FLT_EPSILON*sampleCount;       // Maximum sampling error for round to zero normalization based on multiplication
+            // by reciprocal (using reciprocal generated in round to +inf mode, so that 1.0 matches spec)
             break;
 
-            // If the implementation supports these formats then it will have to
-            // allow rounding error here too, because not all 32-bit ints are
-            // exactly representable in float
+            // If the implementation supports these formats then it will have to allow rounding error here too,
+            // because not all 32-bit ints are exactly representable in float
         case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32: maxError = 1 * FLT_EPSILON; break;
+        case CL_UNSIGNED_INT32:
+            maxError = 1*FLT_EPSILON;
+            break;
     }
 
 
     // Section 8.2
-    if (sampler->addressing_mode == CL_ADDRESS_REPEAT
-        || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT
-        || sampler->filter_mode != CL_FILTER_NEAREST
-        || sampler->normalized_coords)
-#if defined(__APPLE__)
+    if( sampler->addressing_mode == CL_ADDRESS_REPEAT || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT || sampler->filter_mode != CL_FILTER_NEAREST || sampler->normalized_coords )
+#if defined( __APPLE__ )
     {
-        if (sampler->filter_mode != CL_FILTER_NEAREST)
+        if( sampler->filter_mode != CL_FILTER_NEAREST )
         {
             // The maximum
-            if (gDeviceType == CL_DEVICE_TYPE_GPU)
-                // Some GPUs ain't so accurate
-                maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);
+            if( gDeviceType == CL_DEVICE_TYPE_GPU )
+                maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);              // Some GPUs ain't so accurate
             else
-                // The standard method of 2d linear filtering delivers 4.0 ulps
-                // of error in round to nearest (8 in rtz).
+                // The standard method of 2d linear filtering delivers 4.0 ulps of error in round to nearest (8 in rtz).
                 maxError += 4.0f * FLT_EPSILON;
         }
         else
-            // normalized coordinates will introduce some error into the
-            // fractional part of the address, affecting results
-            maxError += 4.0f * FLT_EPSILON;
+            maxError += 4.0f * FLT_EPSILON;    // normalized coordinates will introduce some error into the fractional part of the address, affecting results
     }
 #else
     {
@@ -971,570 +825,828 @@
     return maxError;
 }
 
-size_t get_format_max_int(const cl_image_format *format)
+size_t get_format_max_int( cl_image_format *format )
 {
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
     {
         case CL_SNORM_INT8:
-        case CL_SIGNED_INT8: return 127;
+        case CL_SIGNED_INT8:
+            return 127;
         case CL_UNORM_INT8:
-        case CL_UNSIGNED_INT8: return 255;
+        case CL_UNSIGNED_INT8:
+            return 255;
 
         case CL_SNORM_INT16:
-        case CL_SIGNED_INT16: return 32767;
+        case CL_SIGNED_INT16:
+            return 32767;
 
         case CL_UNORM_INT16:
-        case CL_UNSIGNED_INT16: return 65535;
+        case CL_UNSIGNED_INT16:
+            return 65535;
 
-        case CL_SIGNED_INT32: return 2147483647L;
+        case CL_SIGNED_INT32:
+            return 2147483647L;
 
-        case CL_UNSIGNED_INT32: return 4294967295LL;
-
-        case CL_UNORM_SHORT_565:
-        case CL_UNORM_SHORT_555: return 31;
-
-        case CL_UNORM_INT_101010: return 1023;
-
-        case CL_HALF_FLOAT: return 1 << 10;
-
-#ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: return 16384;
-#endif
-        default: return 0;
-    }
-}
-
-int get_format_min_int(const cl_image_format *format)
-{
-    switch (format->image_channel_data_type)
-    {
-        case CL_SNORM_INT8:
-        case CL_SIGNED_INT8: return -128;
-        case CL_UNORM_INT8:
-        case CL_UNSIGNED_INT8: return 0;
-
-        case CL_SNORM_INT16:
-        case CL_SIGNED_INT16: return -32768;
-
-        case CL_UNORM_INT16:
-        case CL_UNSIGNED_INT16: return 0;
-
-        case CL_SIGNED_INT32: return -2147483648LL;
-
-        case CL_UNSIGNED_INT32: return 0;
+        case CL_UNSIGNED_INT32:
+            return 4294967295LL;
 
         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
-        case CL_UNORM_INT_101010: return 0;
+            return 31;
+
+        case CL_UNORM_INT_101010:
+            return 1023;
+
+        case CL_HALF_FLOAT:
+            return 1<<10;
+
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 16384;
+#endif
+        default:
+            return 0;
+    }
+}
+
+int get_format_min_int( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SIGNED_INT8:
+            return -128;
+        case CL_UNORM_INT8:
+        case CL_UNSIGNED_INT8:
+            return 0;
+
+        case CL_SNORM_INT16:
+        case CL_SIGNED_INT16:
+            return -32768;
+
+        case CL_UNORM_INT16:
+        case CL_UNSIGNED_INT16:
+            return 0;
+
+        case CL_SIGNED_INT32:
+            return -2147483648LL;
+
+        case CL_UNSIGNED_INT32:
+            return 0;
+
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+            return 0;
 
         case CL_HALF_FLOAT: return -(1 << 10);
 
 #ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: return -16384;
+        case CL_SFIXED14_APPLE:
+            return -16384;
 #endif
 
-        default: return 0;
+        default:
+            return 0;
     }
 }
 
-cl_half convert_float_to_half(float f)
+float convert_half_to_float( unsigned short halfValue )
 {
-    switch (gFloatToHalfRoundingMode)
+    // We have to take care of a few special cases, but in general, we just extract
+    // the same components from the half that exist in the float and re-stuff them
+    // For a description of the actual half format, see http://en.wikipedia.org/wiki/Half_precision
+    // Note: we store these in 32-bit ints to make the bit manipulations easier later
+    int sign =     ( halfValue >> 15 ) & 0x0001;
+    int exponent = ( halfValue >> 10 ) & 0x001f;
+    int mantissa = ( halfValue )       & 0x03ff;
+
+    // Note: we use a union here to be able to access the bits of a float directly
+    union
     {
-        case kRoundToNearestEven: return cl_half_from_float(f, CL_HALF_RTE);
-        case kRoundTowardZero: return cl_half_from_float(f, CL_HALF_RTZ);
+        unsigned int bits;
+        float floatValue;
+    } outFloat;
+
+    // Special cases first
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+        {
+            // If both exponent and mantissa are 0, the number is +/- 0
+            outFloat.bits  = sign << 31;
+            return outFloat.floatValue; // Already done!
+        }
+
+        // If exponent is 0, it's a denormalized number, so we renormalize it
+        // Note: this is not terribly efficient, but oh well
+        while( ( mantissa & 0x00000400 ) == 0 )
+        {
+            mantissa <<= 1;
+            exponent--;
+        }
+
+        // The first bit is implicit, so we take it off and inc the exponent accordingly
+        exponent++;
+        mantissa &= ~(0x00000400);
+    }
+    else if( exponent == 31 ) // Special-case "numbers"
+    {
+        // If the exponent is 31, it's a special case number (+/- infinity or NAN).
+        // If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
+        // method is the same
+        outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
+        return outFloat.floatValue;
+    }
+
+    // Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
+    exponent += ( 127 - 15 );
+    mantissa <<= 13;
+
+    outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
+    return outFloat.floatValue;
+}
+
+
+
+cl_ushort convert_float_to_half( float f )
+{
+    switch( gFloatToHalfRoundingMode )
+    {
+        case kRoundToNearestEven:
+            return float2half_rte( f );
+        case kRoundTowardZero:
+            return float2half_rtz( f );
         default:
-            log_error("ERROR: Test internal error -- unhandled or unknown "
-                      "float->half rounding mode.\n");
+            log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
             exit(-1);
             return 0xffff;
     }
+
 }
 
-cl_ulong get_image_size(image_descriptor const *imageInfo)
+cl_ushort float2half_rte( float f )
+    {
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+                }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+        }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
+    }
+
+cl_ushort float2half_rtz( float f )
+    {
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+        {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+        }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+        {
+        if( x == INFINITY )
+            return 0x7c00 | sign;
+
+        return 0x7bff | sign;
+        }
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+
+    u.u &= 0xFFFFE000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
+}
+
+class TEST
+{
+public:
+    TEST();
+};
+
+static TEST t;
+void  __vstore_half_rte(float f, size_t index, uint16_t *p)
+{
+    union{ unsigned int u; float f;} u;
+
+    u.f = f;
+    unsigned short r = (u.u >> 16) & 0x8000;
+    u.u &= 0x7fffffff;
+    if( u.u >= 0x33000000U )
+    {
+        if( u.u >= 0x47800000 )
+        {
+            if( u.u <= 0x7f800000 )
+                r |= 0x7c00;
+            else
+            {
+                r |= 0x7e00 | ( (u.u >> 13) & 0x3ff );
+            }
+        }
+        else
+        {
+            float x = u.f;
+            if( u.u < 0x38800000 )
+                u.u = 0x3f000000;
+            else
+                u.u += 0x06800000;
+            u.u &= 0x7f800000U;
+            x += u.f;
+            x -= u.f;
+            u.f = x * MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+            u.u >>= 13;
+            r |= (unsigned short) u.u;
+        }
+    }
+
+    ((unsigned short*)p)[index] = r;
+}
+
+TEST::TEST()
+{
+    return;
+    union
+    {
+        float f;
+        uint32_t i;
+    } test;
+    uint16_t control, myval;
+
+    log_info(" &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
+    test.i = 0;
+    do
+    {
+        if( ( test.i & 0xffffff ) == 0 )
+        {
+            if( ( test.i & 0xfffffff ) == 0 )
+                log_info( "*" );
+            else
+                log_info( "." );
+            fflush(stdout);
+        }
+        __vstore_half_rte( test.f, 0, &control );
+        myval = convert_float_to_half( test.f );
+        if( myval != control )
+        {
+            log_info( "\n******** ERROR: MyVal %04x control %04x source %12.24f\n", myval, control, test.f );
+            log_info( "         source bits: %08x   %a\n", test.i, test.f );
+            float t, c;
+            c = convert_half_to_float( control );
+            t = convert_half_to_float( myval );
+            log_info( "         converted control: %12.24f myval: %12.24f\n", c, t );
+        }
+        test.i++;
+    } while( test.i != 0 );
+    log_info("\n &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
+
+}
+
+cl_ulong get_image_size( image_descriptor const *imageInfo )
 {
     cl_ulong imageSize;
 
     // Assumes rowPitch and slicePitch are always correctly defined
-    if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
+    if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
     {
-        imageSize = (size_t)compute_mipmapped_image_size(*imageInfo);
+      imageSize = (size_t) compute_mipmapped_image_size(*imageInfo);
     }
     else
     {
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D: imageSize = imageInfo->rowPitch; break;
-            case CL_MEM_OBJECT_IMAGE2D:
-                imageSize = imageInfo->height * imageInfo->rowPitch;
-                break;
-            case CL_MEM_OBJECT_IMAGE3D:
-                imageSize = imageInfo->depth * imageInfo->slicePitch;
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                imageSize = imageInfo->arraySize * imageInfo->slicePitch;
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                imageSize = imageInfo->arraySize * imageInfo->slicePitch;
-                break;
-            default:
-                log_error("ERROR: Cannot identify image type %x\n",
-                          imageInfo->type);
-                abort();
-        }
+      switch (imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE1D:
+        imageSize = imageInfo->rowPitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE2D:
+        imageSize = imageInfo->height * imageInfo->rowPitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE3D:
+        imageSize = imageInfo->depth * imageInfo->slicePitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+        imageSize = imageInfo->arraySize * imageInfo->slicePitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+        imageSize = imageInfo->arraySize * imageInfo->slicePitch;
+        break;
+      default:
+        log_error("ERROR: Cannot identify image type %x\n", imageInfo->type);
+        abort();
+      }
     }
     return imageSize;
 }
 
-// Calculate image size in megabytes (strictly, mebibytes). Result is rounded
-// up.
-cl_ulong get_image_size_mb(image_descriptor const *imageInfo)
+// Calculate image size in megabytes (strictly, mebibytes). Result is rounded up.
+cl_ulong get_image_size_mb( image_descriptor const *imageInfo )
 {
-    cl_ulong imageSize = get_image_size(imageInfo);
-    cl_ulong mb = imageSize / (1024 * 1024);
-    if (imageSize % (1024 * 1024) > 0)
+    cl_ulong imageSize = get_image_size( imageInfo );
+    cl_ulong mb = imageSize / ( 1024 * 1024 );
+    if ( imageSize % ( 1024 * 1024 ) > 0 )
     {
         mb += 1;
     }
-    return mb;
+    return  mb;
 }
 
 
 uint64_t gRoundingStartValue = 0;
 
 
-void escape_inf_nan_values(char *data, size_t allocSize)
-{
+void escape_inf_nan_values( char* data, size_t allocSize ) {
     // filter values with 8 not-quite-highest bits
     unsigned int *intPtr = (unsigned int *)data;
-    for (size_t i = 0; i<allocSize>> 2; i++)
+    for( size_t i = 0; i < allocSize >> 2; i++ )
     {
-        if ((intPtr[i] & 0x7F800000) == 0x7F800000) intPtr[i] ^= 0x40000000;
+        if( ( intPtr[ i ] & 0x7F800000 ) == 0x7F800000 )
+            intPtr[ i ] ^= 0x40000000;
     }
 
-    // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits
-    // = 0x7C00 are special)
+    // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits = 0x7C00 are special)
     unsigned short *shortPtr = (unsigned short *)data;
-    for (size_t i = 0; i<allocSize>> 1; i++)
+    for( size_t i = 0; i < allocSize >> 1; i++ )
     {
-        if ((shortPtr[i] & 0x7C00) == 0x7C00) shortPtr[i] ^= 0x4000;
+        if( ( shortPtr[ i ] & 0x7C00 ) == 0x7C00 )
+            shortPtr[ i ] ^= 0x4000;
     }
 }
 
-char *generate_random_image_data(image_descriptor *imageInfo,
-                                 BufferOwningPtr<char> &P, MTdata d)
+char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d )
 {
-    size_t allocSize = get_image_size(imageInfo);
-    size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format);
+    size_t allocSize = get_image_size( imageInfo );
+    size_t pixelRowBytes = imageInfo->width * get_pixel_size( imageInfo->format );
     size_t i;
 
     if (imageInfo->num_mip_levels > 1)
-        allocSize = compute_mipmapped_image_size(*imageInfo);
+      allocSize = compute_mipmapped_image_size(*imageInfo);
 
-#if defined(__APPLE__)
+#if defined (__APPLE__ )
     char *data = NULL;
-    if (gDeviceType == CL_DEVICE_TYPE_CPU)
-    {
+    if (gDeviceType == CL_DEVICE_TYPE_CPU) {
         size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
 
-        void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
-                         MAP_ANON | MAP_PRIVATE, 0, 0);
+        void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
         intptr_t data_end = (intptr_t)map + mapSize - 4096;
         data = (char *)(data_end - (intptr_t)allocSize);
 
         mprotect(map, 4096, PROT_NONE);
         mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
-        P.reset(data, map, mapSize, allocSize);
-    }
-    else
-    {
+        P.reset(data, map, mapSize,allocSize);
+    } else {
         data = (char *)malloc(allocSize);
-        P.reset(data, NULL, 0, allocSize);
+        P.reset(data,NULL,0,allocSize);
     }
 #else
-    P.reset(NULL); // Free already allocated memory first, then try to allocate
-                   // new block.
-    char *data =
-        (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
-    P.reset(data, NULL, 0, allocSize, true);
+    P.reset( NULL ); // Free already allocated memory first, then try to allocate new block.
+    char *data = (char *)align_malloc(allocSize, get_pixel_size(imageInfo->format));
+    P.reset(data,NULL,0,allocSize, true);
 #endif
 
-    if (data == NULL)
-    {
-        log_error("ERROR: Unable to malloc %lu bytes for "
-                  "generate_random_image_data\n",
-                  allocSize);
-        return 0;
+    if (data == NULL) {
+      log_error( "ERROR: Unable to malloc %lu bytes for generate_random_image_data\n", allocSize );
+      return 0;
     }
 
-    if (gTestRounding)
+    if( gTestRounding )
     {
         // Special case: fill with a ramp from 0 to the size of the type
-        size_t typeSize = get_format_type_size(imageInfo->format);
-        switch (typeSize)
+        size_t typeSize = get_format_type_size( imageInfo->format );
+        switch( typeSize )
         {
-            case 1: {
+            case 1:
+            {
                 char *ptr = data;
-                for (i = 0; i < allocSize; i++)
-                    ptr[i] = (cl_char)(i + gRoundingStartValue);
+                for( i = 0; i < allocSize; i++ )
+                    ptr[i] = (cl_char) (i + gRoundingStartValue);
             }
-            break;
-            case 2: {
-                cl_short *ptr = (cl_short *)data;
-                for (i = 0; i < allocSize / 2; i++)
-                    ptr[i] = (cl_short)(i + gRoundingStartValue);
+                break;
+            case 2:
+            {
+                cl_short *ptr = (cl_short*) data;
+                for( i = 0; i < allocSize / 2; i++ )
+                    ptr[i] = (cl_short) (i +  gRoundingStartValue);
             }
-            break;
-            case 4: {
-                cl_int *ptr = (cl_int *)data;
-                for (i = 0; i < allocSize / 4; i++)
-                    ptr[i] = (cl_int)(i + gRoundingStartValue);
+                break;
+            case 4:
+            {
+                cl_int *ptr = (cl_int*) data;
+                for( i = 0; i < allocSize / 4; i++ )
+                    ptr[i] = (cl_int) (i +  gRoundingStartValue);
             }
-            break;
+                break;
         }
 
-        // Note: inf or nan float values would cause problems, although we don't
-        // know this will actually be a float, so we just know what to look for
-        escape_inf_nan_values(data, allocSize);
+        // Note: inf or nan float values would cause problems, although we don't know this will
+        // actually be a float, so we just know what to look for
+        escape_inf_nan_values( data, allocSize );
         return data;
     }
 
     // Otherwise, we should be able to just fill with random bits no matter what
-    cl_uint *p = (cl_uint *)data;
-    for (i = 0; i + 4 <= allocSize; i += 4) p[i / 4] = genrand_int32(d);
+    cl_uint *p = (cl_uint*) data;
+    for( i = 0; i + 4 <= allocSize; i += 4 )
+        p[ i / 4 ] = genrand_int32(d);
 
-    for (; i < allocSize; i++) data[i] = genrand_int32(d);
+    for( ; i < allocSize; i++ )
+        data[i] = genrand_int32(d);
 
-    // Note: inf or nan float values would cause problems, although we don't
-    // know this will actually be a float, so we just know what to look for
-    escape_inf_nan_values(data, allocSize);
+    // Note: inf or nan float values would cause problems, although we don't know this will
+    // actually be a float, so we just know what to look for
+    escape_inf_nan_values( data, allocSize );
 
-    if (/*!gTestMipmaps*/ imageInfo->num_mip_levels < 2)
+    if ( /*!gTestMipmaps*/ imageInfo->num_mip_levels < 2 )
     {
-        // Fill unused edges with -1, NaN for float
-        if (imageInfo->rowPitch > pixelRowBytes)
-        {
-            size_t height = 0;
+      // Fill unused edges with -1, NaN for float
+      if (imageInfo->rowPitch > pixelRowBytes)
+      {
+          size_t height = 0;
 
-            switch (imageInfo->type)
-            {
-                case CL_MEM_OBJECT_IMAGE2D:
-                case CL_MEM_OBJECT_IMAGE3D:
-                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                    height = imageInfo->height;
-                    break;
-                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                    height = imageInfo->arraySize;
-                    break;
+          switch (imageInfo->type)
+          {
+              case CL_MEM_OBJECT_IMAGE2D:
+              case CL_MEM_OBJECT_IMAGE3D:
+              case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                  height = imageInfo->height;
+                  break;
+              case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                  height = imageInfo->arraySize;
+                  break;
             }
 
             // Fill in the row padding regions
-            for (i = 0; i < height; i++)
+            for( i = 0; i < height; i++ )
             {
                 size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
                 size_t length = imageInfo->rowPitch - pixelRowBytes;
-                memset(data + offset, 0xff, length);
+                memset( data + offset, 0xff, length );
             }
-        }
+      }
 
-        // Fill in the slice padding regions, if necessary:
+      // Fill in the slice padding regions, if necessary:
 
-        size_t slice_dimension = imageInfo->height;
-        if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-        {
-            slice_dimension = imageInfo->arraySize;
-        }
+      size_t slice_dimension = imageInfo->height;
+      if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+          slice_dimension = imageInfo->arraySize;
+      }
 
-        if (imageInfo->slicePitch > slice_dimension * imageInfo->rowPitch)
-        {
-            size_t depth = 0;
-            switch (imageInfo->type)
-            {
-                case CL_MEM_OBJECT_IMAGE2D:
-                case CL_MEM_OBJECT_IMAGE3D: depth = imageInfo->depth; break;
-                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                    depth = imageInfo->arraySize;
-                    break;
-            }
+      if (imageInfo->slicePitch > slice_dimension*imageInfo->rowPitch)
+      {
+          size_t depth = 0;
+          switch (imageInfo->type)
+          {
+            case CL_MEM_OBJECT_IMAGE2D:
+            case CL_MEM_OBJECT_IMAGE3D:
+                depth = imageInfo->depth;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                depth = imageInfo->arraySize;
+                break;
+          }
 
-            for (i = 0; i < depth; i++)
-            {
-                size_t offset = i * imageInfo->slicePitch
-                    + slice_dimension * imageInfo->rowPitch;
-                size_t length = imageInfo->slicePitch
-                    - slice_dimension * imageInfo->rowPitch;
-                memset(data + offset, 0xff, length);
-            }
-        }
+          for( i = 0; i < depth; i++ )
+          {
+              size_t offset = i * imageInfo->slicePitch + slice_dimension*imageInfo->rowPitch;
+              size_t length = imageInfo->slicePitch - slice_dimension*imageInfo->rowPitch;
+              memset( data + offset, 0xff, length );
+          }
+      }
     }
 
     return data;
 }
 
-#define CLAMP_FLOAT(v) (fmaxf(fminf(v, 1.f), -1.f))
+#define CLAMP_FLOAT( v ) ( fmaxf( fminf( v, 1.f ), -1.f ) )
 
 
-void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
-                            int y, int z, float *outData, int lod)
+void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData, int lod )
 {
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
     size_t slice_pitch_lod = 0, row_pitch_lod = 0;
 
-    if (imageInfo->num_mip_levels > 1)
+    if ( imageInfo->num_mip_levels > 1 )
     {
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                depth_lod =
-                    (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                height_lod =
-                    (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            default:
-                width_lod =
-                    (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-        }
-        row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
-        if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-            slice_pitch_lod = row_pitch_lod;
-        else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
-                 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-            slice_pitch_lod = row_pitch_lod * height_lod;
+      switch(imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE3D :
+        depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
+      case CL_MEM_OBJECT_IMAGE2D :
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+        height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
+      default :
+        width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
+      }
+      row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
+      if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
+        slice_pitch_lod = row_pitch_lod;
+      else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+        slice_pitch_lod = row_pitch_lod * height_lod;
     }
     else
     {
-        row_pitch_lod = imageInfo->rowPitch;
-        slice_pitch_lod = imageInfo->slicePitch;
+      row_pitch_lod = imageInfo->rowPitch;
+      slice_pitch_lod = imageInfo->slicePitch;
     }
-    if (x < 0 || y < 0 || z < 0 || x >= (int)width_lod
-        || (height_lod != 0 && y >= (int)height_lod)
-        || (depth_lod != 0 && z >= (int)depth_lod)
-        || (imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize))
+    if ( x < 0 || y < 0 || z < 0 || x >= (int)width_lod
+               || ( height_lod != 0 && y >= (int)height_lod )
+               || ( depth_lod != 0 && z >= (int)depth_lod )
+               || ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) )
     {
-        outData[0] = outData[1] = outData[2] = outData[3] = 0;
-        if (!has_alpha(imageInfo->format)) outData[3] = 1;
+            outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+            if (!has_alpha(imageInfo->format))
+                outData[3] = 1;
         return;
     }
 
-    const cl_image_format *format = imageInfo->format;
+    cl_image_format *format = imageInfo->format;
 
     unsigned int i;
-    float tempData[4];
+    float tempData[ 4 ];
 
     // Advance to the right spot
     char *ptr = (char *)imageData;
-    size_t pixelSize = get_pixel_size(format);
+    size_t pixelSize = get_pixel_size( format );
 
     ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
 
     // OpenCL only supports reading floats from certain formats
-    size_t channelCount = get_format_channel_count(format);
-    switch (format->image_channel_data_type)
+    size_t channelCount = get_format_channel_count( format );
+    switch( format->image_channel_data_type )
     {
-        case CL_SNORM_INT8: {
+        case CL_SNORM_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 127.0f);
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 127.0f );
             break;
         }
 
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             unsigned char *dPtr = (unsigned char *)ptr;
-            for (i = 0; i < channelCount; i++)
-            {
-                if ((is_sRGBA_order(imageInfo->format->image_channel_order))
-                    && i < 3) // only RGB need to be converted for sRGBA
-                    tempData[i] = (float)sRGBunmap((float)dPtr[i] / 255.0f);
+            for( i = 0; i < channelCount; i++ ) {
+                if((is_sRGBA_order(imageInfo->format->image_channel_order)) && i<3) // only RGB need to be converted for sRGBA
+                    tempData[ i ] = (float)sRGBunmap((float)dPtr[ i ] / 255.0f) ;
                 else
-                    tempData[i] = (float)dPtr[i] / 255.0f;
+                    tempData[ i ] = (float)dPtr[ i ] / 255.0f;
             }
             break;
         }
 
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] =  (float)dPtr[ i ];
             break;
         }
 
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             cl_uchar *dPtr = (cl_uchar *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float) dPtr[ i ];
             break;
         }
 
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 32767.0f);
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 32767.0f );
             break;
         }
 
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = (float)dPtr[i] / 65535.0f;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ] / 65535.0f;
             break;
         }
 
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
             break;
         }
 
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float) dPtr[ i ];
             break;
         }
 
-        case CL_HALF_FLOAT: {
-            cl_half *dPtr = (cl_half *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = cl_half_to_float(dPtr[i]);
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = convert_half_to_float( dPtr[ i ] );
             break;
         }
 
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             cl_int *dPtr = (cl_int *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
             break;
         }
 
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
             break;
         }
 
-        case CL_UNORM_SHORT_565: {
+        case CL_UNORM_SHORT_565:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (float)(dPtr[0] >> 11) / (float)31;
-            tempData[1] = (float)((dPtr[0] >> 5) & 63) / (float)63;
-            tempData[2] = (float)(dPtr[0] & 31) / (float)31;
+            tempData[ 0 ] = (float)( dPtr[ 0 ] >> 11 ) / (float)31;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 63 ) / (float)63;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
             break;
         }
 
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (float)((dPtr[0] >> 10) & 31) / (float)31;
-            tempData[1] = (float)((dPtr[0] >> 5) & 31) / (float)31;
-            tempData[2] = (float)(dPtr[0] & 31) / (float)31;
+            tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 31 ) / (float)31;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 31 ) / (float)31;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
             break;
         }
 
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            tempData[0] = (float)((dPtr[0] >> 20) & 0x3ff) / (float)1023;
-            tempData[1] = (float)((dPtr[0] >> 10) & 0x3ff) / (float)1023;
-            tempData[2] = (float)(dPtr[0] & 0x3ff) / (float)1023;
+            tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 20 ) & 0x3ff ) / (float)1023;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 0x3ff ) / (float)1023;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 0x3ff ) / (float)1023;
             break;
         }
 
-        case CL_FLOAT: {
+        case CL_FLOAT:
+        {
             float *dPtr = (float *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
             break;
         }
-#ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: {
-            cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = ((int)dPtr[i] - 16384) * 0x1.0p-14f;
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            cl_ushort *dPtr = (cl_ushort*) ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[i] = ((int) dPtr[i] - 16384) * 0x1.0p-14f;
             break;
         }
 #endif
     }
 
 
-    outData[0] = outData[1] = outData[2] = 0;
-    outData[3] = 1;
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
 
-    switch (format->image_channel_order)
+    switch( format->image_channel_order )
     {
-        case CL_A: outData[3] = tempData[0]; break;
+        case CL_A:
+            outData[ 3 ] = tempData[ 0 ];
+            break;
         case CL_R:
-        case CL_Rx: outData[0] = tempData[0]; break;
+        case CL_Rx:
+            outData[ 0 ] = tempData[ 0 ];
+            break;
         case CL_RA:
-            outData[0] = tempData[0];
-            outData[3] = tempData[1];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 1 ];
             break;
         case CL_RG:
         case CL_RGx:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
             break;
         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
         case CL_sRGBx:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
-            outData[2] = tempData[2];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
             break;
         case CL_RGBA:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
-            outData[2] = tempData[2];
-            outData[3] = tempData[3];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
+            outData[ 3 ] = tempData[ 3 ];
             break;
         case CL_ARGB:
-            outData[0] = tempData[1];
-            outData[1] = tempData[2];
-            outData[2] = tempData[3];
-            outData[3] = tempData[0];
-            break;
-        case CL_ABGR:
-            outData[0] = tempData[3];
-            outData[1] = tempData[2];
-            outData[2] = tempData[1];
-            outData[3] = tempData[0];
+            outData[ 0 ] = tempData[ 1 ];
+            outData[ 1 ] = tempData[ 2 ];
+            outData[ 2 ] = tempData[ 3 ];
+            outData[ 3 ] = tempData[ 0 ];
             break;
         case CL_BGRA:
         case CL_sBGRA:
-            outData[0] = tempData[2];
-            outData[1] = tempData[1];
-            outData[2] = tempData[0];
-            outData[3] = tempData[3];
+            outData[ 0 ] = tempData[ 2 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 3 ];
             break;
         case CL_INTENSITY:
-            outData[0] = tempData[0];
-            outData[1] = tempData[0];
-            outData[2] = tempData[0];
-            outData[3] = tempData[0];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 0 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 0 ];
             break;
         case CL_LUMINANCE:
-            outData[0] = tempData[0];
-            outData[1] = tempData[0];
-            outData[2] = tempData[0];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 0 ];
+            outData[ 2 ] = tempData[ 0 ];
             break;
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE:
-            outData[0] = tempData[1];
-            outData[1] = tempData[2];
-            outData[2] = tempData[3];
-            outData[3] = 1.0f;
+            outData[ 0 ] = tempData[ 1 ];
+            outData[ 1 ] = tempData[ 2 ];
+            outData[ 2 ] = tempData[ 3 ];
+            outData[ 3 ] = 1.0f;
             break;
 #endif
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE:
-            outData[0] = tempData[2];
-            outData[1] = tempData[1];
-            outData[2] = tempData[0];
-            outData[3] = 1.0f;
+            outData[ 0 ] = tempData[ 2 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = 1.0f;
             break;
 #endif
         case CL_sRGBA:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
-            outData[2] = tempData[2];
-            outData[3] = tempData[3];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
+            outData[ 3 ] = tempData[ 3 ];
             break;
-        case CL_DEPTH: outData[0] = tempData[0]; break;
+        case CL_DEPTH:
+            outData[ 0 ] = tempData[ 0 ];
+            break;
         default:
             log_error("Invalid format:");
             print_header(format, true);
@@ -1542,122 +1654,105 @@
     }
 }
 
-void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
-                            int y, int z, float *outData)
+void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData )
 {
-    read_image_pixel_float(imageData, imageInfo, x, y, z, outData, 0);
+  read_image_pixel_float( imageData, imageInfo, x, y, z, outData, 0 );
 }
 
-bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
-                        size_t depth, image_sampler_data *imageSampler,
-                        image_descriptor *imageInfo, int &outX, int &outY,
-                        int &outZ)
-{
-    return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height,
-                                     depth, imageSampler, imageInfo, outX, outY,
-                                     outZ);
+bool get_integer_coords( float x, float y, float z, size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ ) {
+    return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height, depth, imageSampler, imageInfo, outX, outY, outZ);
 }
 
-bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
-                               size_t width, size_t height, size_t depth,
-                               image_sampler_data *imageSampler,
-                               image_descriptor *imageInfo, int &outX,
-                               int &outY, int &outZ)
+bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                               size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ )
 {
-    AddressFn adFn = sAddressingTable[imageSampler];
+    AddressFn adFn = sAddressingTable[ imageSampler ];
 
-    float refX = floorf(x), refY = floorf(y), refZ = floorf(z);
+    float refX = floorf( x ), refY = floorf( y ), refZ = floorf( z );
 
     // Handle sampler-directed coordinate normalization + clamping.  Note that
     // the array coordinate for image array types is expected to be
     // unnormalized, and is clamped to 0..arraySize-1.
-    if (imageSampler->normalized_coords)
+    if( imageSampler->normalized_coords )
     {
         switch (imageSampler->addressing_mode)
         {
             case CL_ADDRESS_REPEAT:
-                x = RepeatNormalizedAddressFn(x, width);
-                if (height != 0)
-                {
+                x = RepeatNormalizedAddressFn( x, width );
+                if (height != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
-                        y = RepeatNormalizedAddressFn(y, height);
+                        y = RepeatNormalizedAddressFn( y, height );
                 }
-                if (depth != 0)
-                {
+                if (depth != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
-                        z = RepeatNormalizedAddressFn(z, depth);
+                        z = RepeatNormalizedAddressFn( z, depth );
                 }
 
-                if (xAddressOffset != 0.0)
-                {
+                if (xAddressOffset != 0.0) {
                     // Add in the offset
                     x += xAddressOffset;
                     // Handle wrapping
-                    if (x > width) x -= (float)width;
-                    if (x < 0) x += (float)width;
+                    if (x > width)
+                        x -= (float)width;
+                    if (x < 0)
+                        x += (float)width;
                 }
-                if ((yAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
-                {
+                if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
                     // Add in the offset
                     y += yAddressOffset;
                     // Handle wrapping
-                    if (y > height) y -= (float)height;
-                    if (y < 0) y += (float)height;
+                    if (y > height)
+                        y -= (float)height;
+                    if (y < 0)
+                        y += (float)height;
                 }
-                if ((zAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
-                {
+                if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
                     // Add in the offset
                     z += zAddressOffset;
                     // Handle wrapping
-                    if (z > depth) z -= (float)depth;
-                    if (z < 0) z += (float)depth;
+                    if (z > depth)
+                        z -= (float)depth;
+                    if (z < 0)
+                        z += (float)depth;
                 }
                 break;
 
             case CL_ADDRESS_MIRRORED_REPEAT:
-                x = MirroredRepeatNormalizedAddressFn(x, width);
-                if (height != 0)
-                {
+                x = MirroredRepeatNormalizedAddressFn( x, width );
+                if (height != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
-                        y = MirroredRepeatNormalizedAddressFn(y, height);
+                        y = MirroredRepeatNormalizedAddressFn( y, height );
                 }
-                if (depth != 0)
-                {
+                if (depth != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
-                        z = MirroredRepeatNormalizedAddressFn(z, depth);
+                        z = MirroredRepeatNormalizedAddressFn( z, depth );
                 }
 
                 if (xAddressOffset != 0.0)
                 {
                     float temp = x + xAddressOffset;
-                    if (temp > (float)width)
-                        temp = (float)width - (temp - (float)width);
-                    x = fabsf(temp);
+                    if( temp > (float) width )
+                        temp = (float) width - (temp - (float) width );
+                    x = fabsf( temp );
                 }
-                if ((yAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
-                {
+                if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
                     float temp = y + yAddressOffset;
-                    if (temp > (float)height)
-                        temp = (float)height - (temp - (float)height);
-                    y = fabsf(temp);
+                    if( temp > (float) height )
+                        temp = (float) height - (temp - (float) height );
+                    y = fabsf( temp );
                 }
-                if ((zAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
-                {
+                if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
                     float temp = z + zAddressOffset;
-                    if (temp > (float)depth)
-                        temp = (float)depth - (temp - (float)depth);
-                    z = fabsf(temp);
+                    if( temp > (float) depth )
+                        temp = (float) depth - (temp - (float) depth );
+                    z = fabsf( temp );
                 }
                 break;
 
             default:
-                // Also, remultiply to the original coords. This simulates any
-                // truncation in the pass to OpenCL
+                // Also, remultiply to the original coords. This simulates any truncation in
+                // the pass to OpenCL
                 x *= (float)width;
                 x += xAddressOffset;
 
@@ -1678,54 +1773,59 @@
 
     // At this point, we're dealing with non-normalized coordinates.
 
-    outX = adFn(floorf(x), width);
+    outX = adFn( floorf( x ), width );
 
     // 1D and 2D arrays require special care for the index coordinate:
 
-    switch (imageInfo->type)
-    {
+    switch (imageInfo->type) {
         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
             outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
             outZ = 0.0f; /* don't care! */
             break;
         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            outY = adFn(floorf(y), height);
+            outY = adFn( floorf( y ), height );
             outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
             break;
         default:
             // legacy path:
-            if (height != 0) outY = adFn(floorf(y), height);
-            if (depth != 0) outZ = adFn(floorf(z), depth);
+            if (height != 0)
+                outY = adFn( floorf( y ), height );
+            if( depth != 0 )
+                outZ = adFn( floorf( z ), depth );
     }
 
-    return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ);
+    return !( (int)refX == outX && (int)refY == outY && (int)refZ == outZ );
 }
 
-static float frac(float a) { return a - floorf(a); }
+static float frac(float a) {
+    return a - floorf(a);
+}
 
-static inline void pixelMax(const float a[4], const float b[4], float *results);
-static inline void pixelMax(const float a[4], const float b[4], float *results)
+static inline void pixelMax( const float a[4], const float b[4], float *results );
+static inline void pixelMax( const float a[4], const float b[4], float *results )
 {
-    for (int i = 0; i < 4; i++) results[i] = errMax(fabsf(a[i]), fabsf(b[i]));
+    for( int i = 0; i < 4; i++ )
+        results[i] = errMax( fabsf(a[i]), fabsf(b[i]) );
 }
 
 // If containsDenorms is NULL, flush denorms to zero
 // if containsDenorms is not NULL, record whether there are any denorms
-static inline void check_for_denorms(float a[4], int *containsDenorms);
-static inline void check_for_denorms(float a[4], int *containsDenorms)
+static inline void  check_for_denorms(float a[4], int *containsDenorms );
+static inline void  check_for_denorms(float a[4], int *containsDenorms )
 {
-    if (NULL == containsDenorms)
+    if( NULL == containsDenorms )
     {
-        for (int i = 0; i < 4; i++)
+        for( int i = 0; i < 4; i++ )
         {
-            if (IsFloatSubnormal(a[i])) a[i] = copysignf(0.0f, a[i]);
+            if( IsFloatSubnormal( a[i] ) )
+                a[i] = copysignf( 0.0f, a[i] );
         }
     }
     else
     {
-        for (int i = 0; i < 4; i++)
+        for( int i = 0; i < 4; i++ )
         {
-            if (IsFloatSubnormal(a[i]))
+            if( IsFloatSubnormal( a[i] ) )
             {
                 *containsDenorms = 1;
                 break;
@@ -1734,14 +1834,13 @@
     }
 }
 
-inline float calculate_array_index(float coord, float extent)
-{
+inline float calculate_array_index( float coord, float extent ) {
     // from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
     //
     // given coordinate 'w' that represents an index:
     // layer_index = clamp( rint(w), 0, image_array_size - 1)
 
-    float ret = rintf(coord);
+    float ret = rintf( coord );
     ret = ret > extent ? extent : ret;
     ret = ret < 0.0f ? 0.0f : ret;
 
@@ -1756,60 +1855,49 @@
  * offset   - an addressing offset to be added to the coordinate
  * extent   - the max value for this coordinate (e.g. width for x)
  */
-static float unnormalize_coordinate(const char *name, float coord, float offset,
-                                    float extent,
-                                    cl_addressing_mode addressing_mode,
-                                    int verbose)
+static float unnormalize_coordinate( const char* name, float coord,
+    float offset, float extent, cl_addressing_mode addressing_mode, int verbose )
 {
     float ret = 0.0f;
 
-    switch (addressing_mode)
-    {
+    switch (addressing_mode) {
         case CL_ADDRESS_REPEAT:
-            ret = RepeatNormalizedAddressFn(coord, extent);
+            ret = RepeatNormalizedAddressFn( coord, extent );
 
-            if (verbose)
-            {
-                log_info("\tRepeat filter denormalizes %s (%f) to %f\n", name,
-                         coord, ret);
+            if ( verbose ) {
+                log_info( "\tRepeat filter denormalizes %s (%f) to %f\n",
+                    name, coord, ret );
             }
 
-            if (offset != 0.0)
-            {
+            if (offset != 0.0) {
                 // Add in the offset, and handle wrapping.
                 ret += offset;
                 if (ret > extent) ret -= extent;
                 if (ret < 0.0) ret += extent;
             }
 
-            if (verbose && offset != 0.0f)
-            {
-                log_info("\tAddress offset of %f added to get %f\n", offset,
-                         ret);
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
             }
             break;
 
         case CL_ADDRESS_MIRRORED_REPEAT:
-            ret = MirroredRepeatNormalizedAddressFn(coord, extent);
+            ret = MirroredRepeatNormalizedAddressFn( coord, extent );
 
-            if (verbose)
-            {
-                log_info(
-                    "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
-                    name, coord, ret);
+            if ( verbose ) {
+                log_info( "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
+                    name, coord, ret );
             }
 
-            if (offset != 0.0)
-            {
+            if (offset != 0.0) {
                 float temp = ret + offset;
-                if (temp > extent) temp = extent - (temp - extent);
-                ret = fabsf(temp);
+                if( temp > extent )
+                    temp = extent - (temp - extent );
+                ret = fabsf( temp );
             }
 
-            if (verbose && offset != 0.0f)
-            {
-                log_info("\tAddress offset of %f added to get %f\n", offset,
-                         ret);
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
             }
             break;
 
@@ -1817,134 +1905,107 @@
 
             ret = coord * extent;
 
-            if (verbose)
-            {
-                log_info("\tFilter denormalizes %s to %f (%f * %f)\n", name,
-                         ret, coord, extent);
+            if ( verbose ) {
+                log_info( "\tFilter denormalizes %s to %f (%f * %f)\n",
+                    name, ret, coord, extent);
             }
 
             ret += offset;
 
-            if (verbose && offset != 0.0f)
-            {
-                log_info("\tAddress offset of %f added to get %f\n", offset,
-                         ret);
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
             }
     }
 
     return ret;
 }
 
-FloatPixel
-sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
-                         float y, float z, image_sampler_data *imageSampler,
-                         float *outData, int verbose, int *containsDenorms)
-{
-    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
-                                           0.0f, 0.0f, imageSampler, outData,
-                                           verbose, containsDenorms);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z,
+                                    image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms ) {
+    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms);
 }
 
 // returns max pixel value of the pixels touched
-FloatPixel sample_image_pixel_float(void *imageData,
-                                    image_descriptor *imageInfo, float x,
-                                    float y, float z,
-                                    image_sampler_data *imageSampler,
-                                    float *outData, int verbose,
-                                    int *containsDenorms, int lod)
-{
-    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
-                                           0.0f, 0.0f, imageSampler, outData,
-                                           verbose, containsDenorms, lod);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z,
+                                    image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod) {
+    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms, lod);
 }
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms, int lod)
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod)
 {
-    AddressFn adFn = sAddressingTable[imageSampler];
+    AddressFn adFn = sAddressingTable[ imageSampler ];
     FloatPixel returnVal;
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
     size_t slice_pitch_lod = 0, row_pitch_lod = 0;
 
-    if (imageInfo->num_mip_levels > 1)
+    if ( imageInfo->num_mip_levels > 1 )
     {
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                depth_lod =
-                    (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                height_lod =
-                    (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            default:
-                width_lod =
-                    (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-        }
-        row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
-        if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-            slice_pitch_lod = row_pitch_lod;
-        else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
-                 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-            slice_pitch_lod = row_pitch_lod * height_lod;
+      switch(imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE3D :
+        depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
+      case CL_MEM_OBJECT_IMAGE2D :
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+        height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
+      default :
+        width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
+      }
+      row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
+      if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
+        slice_pitch_lod = row_pitch_lod;
+      else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+        slice_pitch_lod = row_pitch_lod * height_lod;
     }
     else
     {
-        slice_pitch_lod = imageInfo->slicePitch;
-        row_pitch_lod = imageInfo->rowPitch;
+      slice_pitch_lod = imageInfo->slicePitch;
+      row_pitch_lod = imageInfo->rowPitch;
     }
 
-    if (containsDenorms) *containsDenorms = 0;
+    if( containsDenorms )
+        *containsDenorms = 0;
 
-    if (imageSampler->normalized_coords)
-    {
+    if( imageSampler->normalized_coords ) {
 
         // We need to unnormalize our coordinates differently depending on
         // the image type, but 'x' is always processed the same way.
 
         x = unnormalize_coordinate("x", x, xAddressOffset, (float)width_lod,
-                                   imageSampler->addressing_mode, verbose);
+            imageSampler->addressing_mode, verbose);
 
-        switch (imageInfo->type)
-        {
+        switch (imageInfo->type) {
 
-                // The image array types require special care:
+            // The image array types require special care:
 
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                 z = 0; // don't care -- unused for 1D arrays
                 break;
 
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                y = unnormalize_coordinate(
-                    "y", y, yAddressOffset, (float)height_lod,
+                y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
                     imageSampler->addressing_mode, verbose);
                 break;
 
-                // Everybody else:
+            // Everybody else:
 
             default:
-                y = unnormalize_coordinate(
-                    "y", y, yAddressOffset, (float)height_lod,
+                y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
                     imageSampler->addressing_mode, verbose);
-                z = unnormalize_coordinate(
-                    "z", z, zAddressOffset, (float)depth_lod,
+                z = unnormalize_coordinate("z", z, zAddressOffset, (float)depth_lod,
                     imageSampler->addressing_mode, verbose);
         }
-    }
-    else if (verbose)
-    {
 
-        switch (imageInfo->type)
-        {
+    } else if ( verbose ) {
+
+        switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                 log_info("Starting coordinate: %f, array index %f\n", x, y);
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                log_info("Starting coordinate: %f, %f, array index %f\n", x, y,
-                         z);
+                log_info("Starting coordinate: %f, %f, array index %f\n", x, y, z);
                 break;
             case CL_MEM_OBJECT_IMAGE1D:
             case CL_MEM_OBJECT_IMAGE1D_BUFFER:
@@ -1954,13 +2015,14 @@
                 log_info("Starting coordinate: %f, %f\n", x, y);
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
-            default: log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
+            default:
+                log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
         }
     }
 
     // At this point, we have unnormalized coordinates.
 
-    if (imageSampler->filter_mode == CL_FILTER_NEAREST)
+    if( imageSampler->filter_mode == CL_FILTER_NEAREST )
     {
         int ix, iy, iz;
 
@@ -1968,50 +2030,42 @@
         // coordinates.  Note that the array cases again require special
         // care, per section 8.4 in the OpenCL 1.2 Specification.
 
-        ix = adFn(floorf(x), width_lod);
+        ix = adFn( floorf( x ), width_lod );
 
-        switch (imageInfo->type)
-        {
+        switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                iy =
-                    calculate_array_index(y, (float)(imageInfo->arraySize - 1));
+                iy = calculate_array_index( y, (float)(imageInfo->arraySize - 1) );
                 iz = 0;
-                if (verbose)
-                {
-                    log_info("\tArray index %f evaluates to %d\n", y, iy);
+                if( verbose ) {
+                  log_info("\tArray index %f evaluates to %d\n",y, iy );
                 }
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                iy = adFn(floorf(y), height_lod);
-                iz =
-                    calculate_array_index(z, (float)(imageInfo->arraySize - 1));
-                if (verbose)
-                {
-                    log_info("\tArray index %f evaluates to %d\n", z, iz);
+                iy = adFn( floorf( y ), height_lod );
+                iz = calculate_array_index( z, (float)(imageInfo->arraySize - 1) );
+                if( verbose ) {
+                    log_info("\tArray index %f evaluates to %d\n",z, iz );
                 }
                 break;
             default:
-                iy = adFn(floorf(y), height_lod);
-                if (depth_lod != 0)
-                    iz = adFn(floorf(z), depth_lod);
+                iy = adFn( floorf( y ), height_lod );
+                if( depth_lod != 0 )
+                    iz = adFn( floorf( z ), depth_lod );
                 else
                     iz = 0;
         }
 
-        if (verbose)
-        {
-            if (iz)
-                log_info(
-                    "\tReference integer coords calculated: { %d, %d, %d }\n",
-                    ix, iy, iz);
+        if( verbose ) {
+            if( iz )
+                log_info( "\tReference integer coords calculated: { %d, %d, %d }\n", ix, iy, iz );
             else
-                log_info("\tReference integer coords calculated: { %d, %d }\n",
-                         ix, iy);
+                log_info( "\tReference integer coords calculated: { %d, %d }\n", ix, iy );
         }
 
-        read_image_pixel_float(imageData, imageInfo, ix, iy, iz, outData, lod);
-        check_for_denorms(outData, containsDenorms);
-        for (int i = 0; i < 4; i++) returnVal.p[i] = fabsf(outData[i]);
+        read_image_pixel_float( imageData, imageInfo, ix, iy, iz, outData, lod );
+        check_for_denorms( outData, containsDenorms );
+        for( int i = 0; i < 4; i++ )
+            returnVal.p[i] = fabsf( outData[i] );
         return returnVal;
     }
     else
@@ -2023,23 +2077,19 @@
         // Image arrays can use 2D filtering, but require us to walk into the
         // image a certain number of slices before reading.
 
-        if (depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY
-            || imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+        if( depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
+                          imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
         {
             float array_index = 0;
 
             size_t layer_offset = 0;
 
-            if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-            {
-                array_index =
-                    calculate_array_index(z, (float)(imageInfo->arraySize - 1));
+            if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+                array_index = calculate_array_index(z, (float)(imageInfo->arraySize - 1));
                 layer_offset = slice_pitch_lod * (size_t)array_index;
             }
-            else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-            {
-                array_index =
-                    calculate_array_index(y, (float)(imageInfo->arraySize - 1));
+            else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+                array_index = calculate_array_index(y, (float)(imageInfo->arraySize - 1));
                 layer_offset = slice_pitch_lod * (size_t)array_index;
 
                 // Set up y and height so that the filtering below is correct
@@ -2047,253 +2097,213 @@
                 height = 1;
             }
 
-            int x1 = adFn(floorf(x - 0.5f), width);
+            int x1 = adFn( floorf( x - 0.5f ), width );
             int y1 = 0;
-            int x2 = adFn(floorf(x - 0.5f) + 1, width);
+            int x2 = adFn( floorf( x - 0.5f ) + 1, width );
             int y2 = 0;
-            if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
-                && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
-                && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER))
-            {
-                y1 = adFn(floorf(y - 0.5f), height);
-                y2 = adFn(floorf(y - 0.5f) + 1, height);
-            }
-            else
-            {
-                y = 0.5f;
+            if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D) &&
+                (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
+                (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
+                y1 = adFn( floorf( y - 0.5f ), height );
+                y2 = adFn( floorf( y - 0.5f ) + 1, height );
+            } else {
+              y = 0.5f;
             }
 
-            if (verbose)
-            {
-                log_info("\tActual integer coords used (i = floor(x-.5)): i0:{ "
-                         "%d, %d } and i1:{ %d, %d }\n",
-                         x1, y1, x2, y2);
-                log_info("\tArray coordinate is %f\n", array_index);
+            if( verbose ) {
+                log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{ %d, %d } and i1:{ %d, %d }\n", x1, y1, x2, y2 );
+                log_info( "\tArray coordinate is %f\n", array_index);
             }
 
             // Walk to beginning of the 'correct' slice, if needed.
-            char *imgPtr = ((char *)imageData) + layer_offset;
+            char* imgPtr = ((char*)imageData) + layer_offset;
 
-            float upLeft[4], upRight[4], lowLeft[4], lowRight[4];
+            float upLeft[ 4 ], upRight[ 4 ], lowLeft[ 4 ], lowRight[ 4 ];
             float maxUp[4], maxLow[4];
-            read_image_pixel_float(imgPtr, imageInfo, x1, y1, 0, upLeft, lod);
-            read_image_pixel_float(imgPtr, imageInfo, x2, y1, 0, upRight, lod);
-            check_for_denorms(upLeft, containsDenorms);
-            check_for_denorms(upRight, containsDenorms);
-            pixelMax(upLeft, upRight, maxUp);
-            read_image_pixel_float(imgPtr, imageInfo, x1, y2, 0, lowLeft, lod);
-            read_image_pixel_float(imgPtr, imageInfo, x2, y2, 0, lowRight, lod);
-            check_for_denorms(lowLeft, containsDenorms);
-            check_for_denorms(lowRight, containsDenorms);
-            pixelMax(lowLeft, lowRight, maxLow);
-            pixelMax(maxUp, maxLow, returnVal.p);
+            read_image_pixel_float( imgPtr, imageInfo, x1, y1, 0, upLeft, lod );
+            read_image_pixel_float( imgPtr, imageInfo, x2, y1, 0, upRight, lod );
+            check_for_denorms( upLeft, containsDenorms );
+            check_for_denorms( upRight, containsDenorms );
+            pixelMax( upLeft, upRight, maxUp );
+            read_image_pixel_float( imgPtr, imageInfo, x1, y2, 0, lowLeft, lod );
+            read_image_pixel_float( imgPtr, imageInfo, x2, y2, 0, lowRight, lod );
+            check_for_denorms( lowLeft, containsDenorms );
+            check_for_denorms( lowRight, containsDenorms );
+            pixelMax( lowLeft, lowRight, maxLow );
+            pixelMax( maxUp, maxLow, returnVal.p );
 
-            if (verbose)
+            if( verbose )
             {
-                if (NULL == containsDenorms)
-                    log_info("\tSampled pixels (rgba order, denorms flushed to "
-                             "zero):\n");
+                if( NULL == containsDenorms )
+                    log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
                 else
-                    log_info("\tSampled pixels (rgba order):\n");
-                log_info("\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1],
-                         upLeft[2], upLeft[3]);
-                log_info("\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1],
-                         upRight[2], upRight[3]);
-                log_info("\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1],
-                         lowLeft[2], lowLeft[3]);
-                log_info("\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1],
-                         lowRight[2], lowRight[3]);
+                    log_info( "\tSampled pixels (rgba order):\n" );
+                log_info( "\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1], upLeft[2], upLeft[3] );
+                log_info( "\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1], upRight[2], upRight[3] );
+                log_info( "\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1], lowLeft[2], lowLeft[3] );
+                log_info( "\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1], lowRight[2], lowRight[3] );
             }
 
             bool printMe = false;
-            if (x1 <= 0 || x2 <= 0 || x1 >= (int)width - 1
-                || x2 >= (int)width - 1)
+            if( x1 <= 0 || x2 <= 0 || x1 >= (int)width-1 || x2 >= (int)width-1 )
                 printMe = true;
-            if (y1 <= 0 || y2 <= 0 || y1 >= (int)height - 1
-                || y2 >= (int)height - 1)
+            if( y1 <= 0 || y2 <= 0 || y1 >= (int)height-1 || y2 >= (int)height-1 )
                 printMe = true;
 
-            double weights[2][2];
+            double weights[ 2 ][ 2 ];
 
-            weights[0][0] = weights[0][1] = 1.0 - frac(x - 0.5f);
-            weights[1][0] = weights[1][1] = frac(x - 0.5f);
-            weights[0][0] *= 1.0 - frac(y - 0.5f);
-            weights[1][0] *= 1.0 - frac(y - 0.5f);
-            weights[0][1] *= frac(y - 0.5f);
-            weights[1][1] *= frac(y - 0.5f);
+            weights[ 0 ][ 0 ] = weights[ 0 ][ 1 ] = 1.0 - frac( x - 0.5f );
+            weights[ 1 ][ 0 ] = weights[ 1 ][ 1 ] = frac( x - 0.5f );
+            weights[ 0 ][ 0 ] *= 1.0 - frac( y - 0.5f );
+            weights[ 1 ][ 0 ] *= 1.0 - frac( y - 0.5f );
+            weights[ 0 ][ 1 ] *= frac( y - 0.5f );
+            weights[ 1 ][ 1 ] *= frac( y - 0.5f );
 
-            if (verbose)
-                log_info("\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f\n",
-                         frac(x - 0.5f), frac(y - 0.5f));
+            if( verbose )
+                log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f\n",  frac( x - 0.5f ), frac( y - 0.5f ) );
 
-            for (int i = 0; i < 3; i++)
+            for( int i = 0; i < 3; i++ )
             {
-                outData[i] = (float)((upLeft[i] * weights[0][0])
-                                     + (upRight[i] * weights[1][0])
-                                     + (lowLeft[i] * weights[0][1])
-                                     + (lowRight[i] * weights[1][1]));
+                outData[ i ] = (float)( ( upLeft[ i ] * weights[ 0 ][ 0 ] ) +
+                                        ( upRight[ i ] * weights[ 1 ][ 0 ] ) +
+                                        ( lowLeft[ i ] * weights[ 0 ][ 1 ] ) +
+                                        ( lowRight[ i ] * weights[ 1 ][ 1 ] ));
                 // flush subnormal results to zero if necessary
-                if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
-                    outData[i] = copysignf(0.0f, outData[i]);
+                if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
+                    outData[i] = copysignf( 0.0f, outData[i] );
             }
-            outData[3] = (float)((upLeft[3] * weights[0][0])
-                                 + (upRight[3] * weights[1][0])
-                                 + (lowLeft[3] * weights[0][1])
-                                 + (lowRight[3] * weights[1][1]));
+            outData[ 3 ] = (float)( ( upLeft[ 3 ] * weights[ 0 ][ 0 ] ) +
+                                   ( upRight[ 3 ] * weights[ 1 ][ 0 ] ) +
+                                   ( lowLeft[ 3 ] * weights[ 0 ][ 1 ] ) +
+                                   ( lowRight[ 3 ] * weights[ 1 ][ 1 ] ));
             // flush subnormal results to zero if necessary
-            if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
-                outData[3] = copysignf(0.0f, outData[3]);
+            if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
+                outData[3] = copysignf( 0.0f, outData[3] );
         }
         else
         {
             // 3D linear filtering
-            int x1 = adFn(floorf(x - 0.5f), width_lod);
-            int y1 = adFn(floorf(y - 0.5f), height_lod);
-            int z1 = adFn(floorf(z - 0.5f), depth_lod);
-            int x2 = adFn(floorf(x - 0.5f) + 1, width_lod);
-            int y2 = adFn(floorf(y - 0.5f) + 1, height_lod);
-            int z2 = adFn(floorf(z - 0.5f) + 1, depth_lod);
+            int x1 = adFn( floorf( x - 0.5f ), width_lod );
+            int y1 = adFn( floorf( y - 0.5f ), height_lod );
+            int z1 = adFn( floorf( z - 0.5f ), depth_lod );
+            int x2 = adFn( floorf( x - 0.5f ) + 1, width_lod );
+            int y2 = adFn( floorf( y - 0.5f ) + 1, height_lod );
+            int z2 = adFn( floorf( z - 0.5f ) + 1, depth_lod );
 
-            if (verbose)
-                log_info("\tActual integer coords used (i = floor(x-.5)): "
-                         "i0:{%d, %d, %d} and i1:{%d, %d, %d}\n",
-                         x1, y1, z1, x2, y2, z2);
+            if( verbose )
+                log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{%d, %d, %d} and i1:{%d, %d, %d}\n", x1, y1, z1, x2, y2, z2 );
 
-            float upLeftA[4], upRightA[4], lowLeftA[4], lowRightA[4];
-            float upLeftB[4], upRightB[4], lowLeftB[4], lowRightB[4];
+            float upLeftA[ 4 ], upRightA[ 4 ], lowLeftA[ 4 ], lowRightA[ 4 ];
+            float upLeftB[ 4 ], upRightB[ 4 ], lowLeftB[ 4 ], lowRightB[ 4 ];
             float pixelMaxA[4], pixelMaxB[4];
-            read_image_pixel_float(imageData, imageInfo, x1, y1, z1, upLeftA,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y1, z1, upRightA,
-                                   lod);
-            check_for_denorms(upLeftA, containsDenorms);
-            check_for_denorms(upRightA, containsDenorms);
-            pixelMax(upLeftA, upRightA, pixelMaxA);
-            read_image_pixel_float(imageData, imageInfo, x1, y2, z1, lowLeftA,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y2, z1, lowRightA,
-                                   lod);
-            check_for_denorms(lowLeftA, containsDenorms);
-            check_for_denorms(lowRightA, containsDenorms);
-            pixelMax(lowLeftA, lowRightA, pixelMaxB);
-            pixelMax(pixelMaxA, pixelMaxB, returnVal.p);
-            read_image_pixel_float(imageData, imageInfo, x1, y1, z2, upLeftB,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y1, z2, upRightB,
-                                   lod);
-            check_for_denorms(upLeftB, containsDenorms);
-            check_for_denorms(upRightB, containsDenorms);
-            pixelMax(upLeftB, upRightB, pixelMaxA);
-            read_image_pixel_float(imageData, imageInfo, x1, y2, z2, lowLeftB,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y2, z2, lowRightB,
-                                   lod);
-            check_for_denorms(lowLeftB, containsDenorms);
-            check_for_denorms(lowRightB, containsDenorms);
-            pixelMax(lowLeftB, lowRightB, pixelMaxB);
-            pixelMax(pixelMaxA, pixelMaxB, pixelMaxA);
-            pixelMax(pixelMaxA, returnVal.p, returnVal.p);
+            read_image_pixel_float( imageData, imageInfo, x1, y1, z1, upLeftA, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y1, z1, upRightA, lod );
+            check_for_denorms( upLeftA, containsDenorms );
+            check_for_denorms( upRightA, containsDenorms );
+            pixelMax( upLeftA, upRightA, pixelMaxA );
+            read_image_pixel_float( imageData, imageInfo, x1, y2, z1, lowLeftA, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y2, z1, lowRightA, lod );
+            check_for_denorms( lowLeftA, containsDenorms );
+            check_for_denorms( lowRightA, containsDenorms );
+            pixelMax( lowLeftA, lowRightA, pixelMaxB );
+            pixelMax( pixelMaxA, pixelMaxB, returnVal.p);
+            read_image_pixel_float( imageData, imageInfo, x1, y1, z2, upLeftB, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y1, z2, upRightB, lod );
+            check_for_denorms( upLeftB, containsDenorms );
+            check_for_denorms( upRightB, containsDenorms );
+            pixelMax( upLeftB, upRightB, pixelMaxA );
+            read_image_pixel_float( imageData, imageInfo, x1, y2, z2, lowLeftB, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y2, z2, lowRightB, lod );
+            check_for_denorms( lowLeftB, containsDenorms );
+            check_for_denorms( lowRightB, containsDenorms );
+            pixelMax( lowLeftB, lowRightB, pixelMaxB );
+            pixelMax( pixelMaxA, pixelMaxB, pixelMaxA);
+            pixelMax( pixelMaxA, returnVal.p, returnVal.p );
 
-            if (verbose)
+            if( verbose )
             {
-                if (NULL == containsDenorms)
-                    log_info("\tSampled pixels (rgba order, denorms flushed to "
-                             "zero):\n");
+                if( NULL == containsDenorms )
+                    log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
                 else
-                    log_info("\tSampled pixels (rgba order):\n");
-                log_info("\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1],
-                         upLeftA[2], upLeftA[3]);
-                log_info("\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1],
-                         upRightA[2], upRightA[3]);
-                log_info("\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1],
-                         lowLeftA[2], lowLeftA[3]);
-                log_info("\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0],
-                         lowRightA[1], lowRightA[2], lowRightA[3]);
-                log_info("\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1],
-                         upLeftB[2], upLeftB[3]);
-                log_info("\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1],
-                         upRightB[2], upRightB[3]);
-                log_info("\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1],
-                         lowLeftB[2], lowLeftB[3]);
-                log_info("\t\tp111: %f, %f, %f, %f\n", lowRightB[0],
-                         lowRightB[1], lowRightB[2], lowRightB[3]);
+                    log_info( "\tSampled pixels (rgba order):\n" );
+                log_info( "\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1], upLeftA[2], upLeftA[3] );
+                log_info( "\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1], upRightA[2], upRightA[3] );
+                log_info( "\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1], lowLeftA[2], lowLeftA[3] );
+                log_info( "\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0], lowRightA[1], lowRightA[2], lowRightA[3] );
+                log_info( "\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1], upLeftB[2], upLeftB[3] );
+                log_info( "\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1], upRightB[2], upRightB[3] );
+                log_info( "\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1], lowLeftB[2], lowLeftB[3] );
+                log_info( "\t\tp111: %f, %f, %f, %f\n", lowRightB[0], lowRightB[1], lowRightB[2], lowRightB[3] );
             }
 
-            double weights[2][2][2];
+            double weights[ 2 ][ 2 ][ 2 ];
 
-            float a = frac(x - 0.5f), b = frac(y - 0.5f), c = frac(z - 0.5f);
-            weights[0][0][0] = weights[0][1][0] = weights[0][0][1] =
-                weights[0][1][1] = 1.f - a;
-            weights[1][0][0] = weights[1][1][0] = weights[1][0][1] =
-                weights[1][1][1] = a;
-            weights[0][0][0] *= 1.f - b;
-            weights[1][0][0] *= 1.f - b;
-            weights[0][0][1] *= 1.f - b;
-            weights[1][0][1] *= 1.f - b;
-            weights[0][1][0] *= b;
-            weights[1][1][0] *= b;
-            weights[0][1][1] *= b;
-            weights[1][1][1] *= b;
-            weights[0][0][0] *= 1.f - c;
-            weights[0][1][0] *= 1.f - c;
-            weights[1][0][0] *= 1.f - c;
-            weights[1][1][0] *= 1.f - c;
-            weights[0][0][1] *= c;
-            weights[0][1][1] *= c;
-            weights[1][0][1] *= c;
-            weights[1][1][1] *= c;
+            float a = frac( x - 0.5f ), b = frac( y - 0.5f ), c = frac( z - 0.5f );
+            weights[ 0 ][ 0 ][ 0 ] = weights[ 0 ][ 1 ][ 0 ] = weights[ 0 ][ 0 ][ 1 ] = weights[ 0 ][ 1 ][ 1 ] = 1.f - a;
+            weights[ 1 ][ 0 ][ 0 ] = weights[ 1 ][ 1 ][ 0 ] = weights[ 1 ][ 0 ][ 1 ] = weights[ 1 ][ 1 ][ 1 ] = a;
+            weights[ 0 ][ 0 ][ 0 ] *= 1.f - b;
+            weights[ 1 ][ 0 ][ 0 ] *= 1.f - b;
+            weights[ 0 ][ 0 ][ 1 ] *= 1.f - b;
+            weights[ 1 ][ 0 ][ 1 ] *= 1.f - b;
+            weights[ 0 ][ 1 ][ 0 ] *= b;
+            weights[ 1 ][ 1 ][ 0 ] *= b;
+            weights[ 0 ][ 1 ][ 1 ] *= b;
+            weights[ 1 ][ 1 ][ 1 ] *= b;
+            weights[ 0 ][ 0 ][ 0 ] *= 1.f - c;
+            weights[ 0 ][ 1 ][ 0 ] *= 1.f - c;
+            weights[ 1 ][ 0 ][ 0 ] *= 1.f - c;
+            weights[ 1 ][ 1 ][ 0 ] *= 1.f - c;
+            weights[ 0 ][ 0 ][ 1 ] *= c;
+            weights[ 0 ][ 1 ][ 1 ] *= c;
+            weights[ 1 ][ 0 ][ 1 ] *= c;
+            weights[ 1 ][ 1 ][ 1 ] *= c;
 
-            if (verbose)
-                log_info("\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f, "
-                         "frac( z - 0.5f ) = %f\n",
-                         frac(x - 0.5f), frac(y - 0.5f), frac(z - 0.5f));
+            if( verbose )
+                log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f, frac( z - 0.5f ) = %f\n",
+                         frac( x - 0.5f ), frac( y - 0.5f ), frac( z - 0.5f )  );
 
-            for (int i = 0; i < 3; i++)
+            for( int i = 0; i < 3; i++ )
             {
-                outData[i] = (float)((upLeftA[i] * weights[0][0][0])
-                                     + (upRightA[i] * weights[1][0][0])
-                                     + (lowLeftA[i] * weights[0][1][0])
-                                     + (lowRightA[i] * weights[1][1][0])
-                                     + (upLeftB[i] * weights[0][0][1])
-                                     + (upRightB[i] * weights[1][0][1])
-                                     + (lowLeftB[i] * weights[0][1][1])
-                                     + (lowRightB[i] * weights[1][1][1]));
+                outData[ i ] = (float)( ( upLeftA[ i ] * weights[ 0 ][ 0 ][ 0 ] ) +
+                                        ( upRightA[ i ] * weights[ 1 ][ 0 ][ 0 ] ) +
+                                        ( lowLeftA[ i ] * weights[ 0 ][ 1 ][ 0 ] ) +
+                                        ( lowRightA[ i ] * weights[ 1 ][ 1 ][ 0 ] ) +
+                                        ( upLeftB[ i ] * weights[ 0 ][ 0 ][ 1 ] ) +
+                                        ( upRightB[ i ] * weights[ 1 ][ 0 ][ 1 ] ) +
+                                        ( lowLeftB[ i ] * weights[ 0 ][ 1 ][ 1 ] ) +
+                                        ( lowRightB[ i ] * weights[ 1 ][ 1 ][ 1 ] ));
                 // flush subnormal results to zero if necessary
-                if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
-                    outData[i] = copysignf(0.0f, outData[i]);
+                if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
+                    outData[i] = copysignf( 0.0f, outData[i] );
             }
-            outData[3] = (float)((upLeftA[3] * weights[0][0][0])
-                                 + (upRightA[3] * weights[1][0][0])
-                                 + (lowLeftA[3] * weights[0][1][0])
-                                 + (lowRightA[3] * weights[1][1][0])
-                                 + (upLeftB[3] * weights[0][0][1])
-                                 + (upRightB[3] * weights[1][0][1])
-                                 + (lowLeftB[3] * weights[0][1][1])
-                                 + (lowRightB[3] * weights[1][1][1]));
+            outData[ 3 ] = (float)( ( upLeftA[ 3 ] * weights[ 0 ][ 0 ][ 0 ] ) +
+                                   ( upRightA[ 3 ] * weights[ 1 ][ 0 ][ 0 ] ) +
+                                   ( lowLeftA[ 3 ] * weights[ 0 ][ 1 ][ 0 ] ) +
+                                   ( lowRightA[ 3 ] * weights[ 1 ][ 1 ][ 0 ] ) +
+                                   ( upLeftB[ 3 ] * weights[ 0 ][ 0 ][ 1 ] ) +
+                                   ( upRightB[ 3 ] * weights[ 1 ][ 0 ][ 1 ] ) +
+                                   ( lowLeftB[ 3 ] * weights[ 0 ][ 1 ][ 1 ] ) +
+                                   ( lowRightB[ 3 ] * weights[ 1 ][ 1 ][ 1 ] ));
             // flush subnormal results to zero if necessary
-            if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
-                outData[3] = copysignf(0.0f, outData[3]);
+            if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
+                outData[3] = copysignf( 0.0f, outData[3] );
         }
 
         return returnVal;
     }
 }
 
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms)
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms )
 {
-    return sample_image_pixel_float_offset(
-        imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset,
-        zAddressOffset, imageSampler, outData, verbose, containsDenorms, 0);
+  return sample_image_pixel_float_offset( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
+    imageSampler, outData, verbose, containsDenorms, 0);
 }
 
 
-int debug_find_vector_in_image(void *imagePtr, image_descriptor *imageInfo,
-                               void *vectorToFind, size_t vectorSize, int *outX,
-                               int *outY, int *outZ, size_t lod)
+int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                               void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod )
 {
     int foundCount = 0;
     char *iPtr = (char *)imagePtr;
@@ -2305,159 +2315,151 @@
 
     switch (imageInfo->type)
     {
-        case CL_MEM_OBJECT_IMAGE1D:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height = 1;
-            depth = 1;
-            break;
-        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height = 1;
-            depth = imageInfo->arraySize;
-            break;
-        case CL_MEM_OBJECT_IMAGE2D:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth = 1;
-            break;
-        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth = imageInfo->arraySize;
-            break;
-        case CL_MEM_OBJECT_IMAGE3D:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-            break;
+    case CL_MEM_OBJECT_IMAGE1D:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = 1;
+      depth = 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = 1;
+      depth = imageInfo->arraySize;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = imageInfo->arraySize;
+      break;
+    case CL_MEM_OBJECT_IMAGE3D:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
+      break;
     }
 
-    row_pitch = width * get_pixel_size(imageInfo->format);
+    row_pitch = width * get_pixel_size( imageInfo->format );
     slice_pitch = row_pitch * height;
 
-    for (size_t z = 0; z < depth; z++)
+    for( size_t z = 0; z < depth; z++ )
     {
-        for (size_t y = 0; y < height; y++)
+        for( size_t y = 0; y < height; y++ )
         {
-            for (size_t x = 0; x < width; x++)
+            for( size_t x = 0; x < width; x++)
             {
-                if (memcmp(iPtr, vectorToFind, vectorSize) == 0)
+                if( memcmp( iPtr, vectorToFind, vectorSize ) == 0 )
                 {
-                    if (foundCount == 0)
+                    if( foundCount == 0 )
                     {
                         *outX = (int)x;
-                        if (outY != NULL) *outY = (int)y;
-                        if (outZ != NULL) *outZ = (int)z;
+                        if (outY != NULL)
+                            *outY = (int)y;
+                        if( outZ != NULL )
+                            *outZ = (int)z;
                     }
                     foundCount++;
                 }
                 iPtr += vectorSize;
             }
-            iPtr += row_pitch - (width * vectorSize);
+            iPtr += row_pitch - ( width * vectorSize );
         }
-        iPtr += slice_pitch - (height * row_pitch);
+        iPtr += slice_pitch - ( height * row_pitch );
     }
     return foundCount;
 }
 
-int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
-                              unsigned int *valuesToFind, int *outX, int *outY,
-                              int *outZ, int lod)
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
 {
-    char vectorToFind[4 * 4];
-    size_t vectorSize = get_format_channel_count(imageInfo->format);
+    char vectorToFind[ 4 * 4 ];
+    size_t vectorSize = get_format_channel_count( imageInfo->format );
 
 
-    if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8)
+    if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8 )
     {
         unsigned char *p = (unsigned char *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned char)valuesToFind[i];
     }
-    else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16)
+    else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16 )
     {
         unsigned short *p = (unsigned short *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned short)valuesToFind[i];
         vectorSize *= 2;
     }
-    else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32)
+    else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32 )
     {
         unsigned int *p = (unsigned int *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned int)valuesToFind[i];
         vectorSize *= 4;
     }
     else
     {
-        log_info("WARNING: Unable to search for debug pixel: invalid image "
-                 "format\n");
+        log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
         return false;
     }
-    return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
-                                      vectorSize, outX, outY, outZ, lod);
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
 }
 
-int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
-                              int *valuesToFind, int *outX, int *outY,
-                              int *outZ, int lod)
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
 {
-    char vectorToFind[4 * 4];
-    size_t vectorSize = get_format_channel_count(imageInfo->format);
+    char vectorToFind[ 4 * 4 ];
+    size_t vectorSize = get_format_channel_count( imageInfo->format );
 
-    if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT8)
+    if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT8 )
     {
         char *p = (char *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (char)valuesToFind[i];
     }
-    else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT16)
+    else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT16 )
     {
         short *p = (short *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (short)valuesToFind[i];
         vectorSize *= 2;
     }
-    else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT32)
+    else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT32 )
     {
         int *p = (int *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (int)valuesToFind[i];
         vectorSize *= 4;
     }
     else
     {
-        log_info("WARNING: Unable to search for debug pixel: invalid image "
-                 "format\n");
+        log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
         return false;
     }
-    return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
-                                      vectorSize, outX, outY, outZ, lod);
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
 }
 
-int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
-                              float *valuesToFind, int *outX, int *outY,
-                              int *outZ, int lod)
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              float *valuesToFind, int *outX, int *outY, int *outZ, int lod )
 {
-    char vectorToFind[4 * 4];
+    char vectorToFind[ 4 * 4 ];
     float swizzled[4];
-    memcpy(swizzled, valuesToFind, sizeof(swizzled));
-    size_t vectorSize = get_pixel_size(imageInfo->format);
-    pack_image_pixel(swizzled, imageInfo->format, vectorToFind);
-    return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
-                                      vectorSize, outX, outY, outZ, lod);
+    memcpy( swizzled, valuesToFind, sizeof( swizzled ) );
+    size_t vectorSize = get_pixel_size( imageInfo->format );
+    pack_image_pixel( swizzled, imageInfo->format, vectorToFind );
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
 }
 
-template <class T>
-void swizzle_vector_for_image(T *srcVector, const cl_image_format *imageFormat)
+template <class T> void swizzle_vector_for_image( T *srcVector, const cl_image_format *imageFormat )
 {
     T temp;
-    switch (imageFormat->image_channel_order)
+    switch( imageFormat->image_channel_order )
     {
-        case CL_A: srcVector[0] = srcVector[3]; break;
+        case CL_A:
+            srcVector[ 0 ] = srcVector[ 3 ];
+            break;
         case CL_R:
         case CL_Rx:
         case CL_RG:
@@ -2467,475 +2469,461 @@
         case CL_RGBA:
         case CL_sRGB:
         case CL_sRGBx:
-        case CL_sRGBA: break;
-        case CL_RA: srcVector[1] = srcVector[3]; break;
-        case CL_ARGB:
-            temp = srcVector[3];
-            srcVector[3] = srcVector[2];
-            srcVector[2] = srcVector[1];
-            srcVector[1] = srcVector[0];
-            srcVector[0] = temp;
+        case CL_sRGBA:
             break;
-        case CL_ABGR:
-            temp = srcVector[3];
-            srcVector[3] = srcVector[0];
-            srcVector[0] = temp;
-            temp = srcVector[2];
-            srcVector[2] = srcVector[1];
-            srcVector[1] = temp;
+        case CL_RA:
+            srcVector[ 1 ] = srcVector[ 3 ];
+            break;
+        case CL_ARGB:
+            temp = srcVector[ 3 ];
+            srcVector[ 3 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = srcVector[ 1 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            srcVector[ 0 ] = temp;
             break;
         case CL_BGRA:
         case CL_sBGRA:
-            temp = srcVector[0];
-            srcVector[0] = srcVector[2];
-            srcVector[2] = temp;
+            temp = srcVector[ 0 ];
+            srcVector[ 0 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = temp;
             break;
         case CL_INTENSITY:
-            srcVector[3] = srcVector[0];
-            srcVector[2] = srcVector[0];
-            srcVector[1] = srcVector[0];
+            srcVector[ 3 ] = srcVector[ 0 ];
+            srcVector[ 2 ] = srcVector[ 0 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
             break;
         case CL_LUMINANCE:
-            srcVector[2] = srcVector[0];
-            srcVector[1] = srcVector[0];
+            srcVector[ 2 ] = srcVector[ 0 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
             break;
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE:
-            temp = srcVector[3];
-            srcVector[3] = srcVector[2];
-            srcVector[2] = srcVector[1];
-            srcVector[1] = srcVector[0];
-            srcVector[0] = temp;
+            temp = srcVector[ 3 ];
+            srcVector[ 3 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = srcVector[ 1 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            srcVector[ 0 ] = temp;
             break;
 #endif
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE:
-            temp = srcVector[0];
-            srcVector[0] = srcVector[2];
-            srcVector[2] = temp;
+            temp = srcVector[ 0 ];
+            srcVector[ 0 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = temp;
             break;
 #endif
     }
 }
 
-#define SATURATE(v, min, max) (v < min ? min : (v > max ? max : v))
+#define SATURATE( v, min, max ) ( v < min ? min : ( v > max ? max : v ) )
 
-void pack_image_pixel(unsigned int *srcVector,
-                      const cl_image_format *imageFormat, void *outData)
+void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData )
 {
-    swizzle_vector_for_image<unsigned int>(srcVector, imageFormat);
-    size_t channelCount = get_format_channel_count(imageFormat);
+    swizzle_vector_for_image<unsigned int>( srcVector, imageFormat );
+    size_t channelCount = get_format_channel_count( imageFormat );
 
-    switch (imageFormat->image_channel_data_type)
+    switch( imageFormat->image_channel_data_type )
     {
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             unsigned char *ptr = (unsigned char *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned char)SATURATE(srcVector[i], 0, 255);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned char)SATURATE( srcVector[ i ], 0, 255 );
             break;
         }
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             unsigned short *ptr = (unsigned short *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned short)SATURATE(srcVector[i], 0, 65535);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned short)SATURATE( srcVector[ i ], 0, 65535 );
             break;
         }
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             unsigned int *ptr = (unsigned int *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned int)srcVector[i];
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned int)srcVector[ i ];
             break;
         }
-        default: break;
+        default:
+            break;
     }
 }
 
-void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
-                      void *outData)
+void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData )
 {
-    swizzle_vector_for_image<int>(srcVector, imageFormat);
-    size_t chanelCount = get_format_channel_count(imageFormat);
+    swizzle_vector_for_image<int>( srcVector, imageFormat );
+    size_t chanelCount = get_format_channel_count( imageFormat );
 
-    switch (imageFormat->image_channel_data_type)
+    switch( imageFormat->image_channel_data_type )
     {
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             char *ptr = (char *)outData;
-            for (unsigned int i = 0; i < chanelCount; i++)
-                ptr[i] = (char)SATURATE(srcVector[i], -128, 127);
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (char)SATURATE( srcVector[ i ], -128, 127 );
             break;
         }
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             short *ptr = (short *)outData;
-            for (unsigned int i = 0; i < chanelCount; i++)
-                ptr[i] = (short)SATURATE(srcVector[i], -32768, 32767);
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (short)SATURATE( srcVector[ i ], -32768, 32767 );
             break;
         }
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             int *ptr = (int *)outData;
-            for (unsigned int i = 0; i < chanelCount; i++)
-                ptr[i] = (int)srcVector[i];
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (int)srcVector[ i ];
             break;
         }
-        default: break;
+        default:
+            break;
     }
 }
 
-int round_to_even(float v)
+int round_to_even( float v )
 {
     // clamp overflow
-    if (v >= -(float)INT_MIN) return INT_MAX;
-    if (v <= (float)INT_MIN) return INT_MIN;
+    if( v >= - (float) INT_MIN )
+        return INT_MAX;
+    if( v <= (float) INT_MIN )
+        return INT_MIN;
 
     // round fractional values to integer value
-    if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23))
+    if( fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
     {
-        static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23),
-                                        MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
-        float magicVal = magic[v < 0.0f];
+        static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23), MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
+        float magicVal = magic[ v < 0.0f ];
         v += magicVal;
         v -= magicVal;
     }
 
-    return (int)v;
+    return (int) v;
 }
 
-void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
-                      void *outData)
+void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData )
 {
-    swizzle_vector_for_image<float>(srcVector, imageFormat);
-    size_t channelCount = get_format_channel_count(imageFormat);
-    switch (imageFormat->image_channel_data_type)
+    swizzle_vector_for_image<float>( srcVector, imageFormat );
+    size_t channelCount = get_format_channel_count( imageFormat );
+    switch( imageFormat->image_channel_data_type )
     {
-        case CL_HALF_FLOAT: {
-            cl_half *ptr = (cl_half *)outData;
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *ptr = (cl_ushort *)outData;
 
-            switch (gFloatToHalfRoundingMode)
+            switch( gFloatToHalfRoundingMode )
             {
                 case kRoundToNearestEven:
-                    for (unsigned int i = 0; i < channelCount; i++)
-                        ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTE);
-                    break;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                        ptr[ i ] = float2half_rte( srcVector[ i ] );
+            break;
                 case kRoundTowardZero:
-                    for (unsigned int i = 0; i < channelCount; i++)
-                        ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTZ);
+                    for( unsigned int i = 0; i < channelCount; i++ )
+                        ptr[ i ] = float2half_rtz( srcVector[ i ] );
                     break;
                 default:
-                    log_error("ERROR: Test internal error -- unhandled or "
-                              "unknown float->half rounding mode.\n");
+                    log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
                     exit(-1);
                     break;
-            }
+        }
             break;
         }
 
-        case CL_FLOAT: {
+        case CL_FLOAT:
+        {
             cl_float *ptr = (cl_float *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = srcVector[i];
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = srcVector[ i ];
             break;
         }
 
-        case CL_SNORM_INT8: {
+        case CL_SNORM_INT8:
+        {
             cl_char *ptr = (cl_char *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (cl_char)NORMALIZE_SIGNED(srcVector[i], -127.0f, 127.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_char)NORMALIZE_SIGNED( srcVector[ i ], -127.0f, 127.f );
             break;
         }
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             cl_short *ptr = (cl_short *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (short)NORMALIZE_SIGNED(srcVector[i], -32767.f, 32767.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (short)NORMALIZE_SIGNED( srcVector[ i ], -32767.f, 32767.f  );
             break;
         }
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             cl_uchar *ptr = (cl_uchar *)outData;
-            if (is_sRGBA_order(imageFormat->image_channel_order))
+            if ( is_sRGBA_order(imageFormat->image_channel_order) )
             {
-                ptr[0] = (unsigned char)(sRGBmap(srcVector[0]) + 0.5);
-                ptr[1] = (unsigned char)(sRGBmap(srcVector[1]) + 0.5);
-                ptr[2] = (unsigned char)(sRGBmap(srcVector[2]) + 0.5);
+                ptr[ 0 ] = (unsigned char)( sRGBmap( srcVector[ 0 ] ) + 0.5 );
+                ptr[ 1 ] = (unsigned char)( sRGBmap( srcVector[ 1 ] ) + 0.5 );
+                ptr[ 2 ] = (unsigned char)( sRGBmap( srcVector[ 2 ] ) + 0.5 );
                 if (channelCount == 4)
-                    ptr[3] = (unsigned char)NORMALIZE(srcVector[3], 255.f);
+                    ptr[ 3 ] = (unsigned char)NORMALIZE( srcVector[ 3 ], 255.f );
             }
             else
             {
-                for (unsigned int i = 0; i < channelCount; i++)
-                    ptr[i] = (unsigned char)NORMALIZE(srcVector[i], 255.f);
+                for( unsigned int i = 0; i < channelCount; i++ )
+                    ptr[ i ] = (unsigned char)NORMALIZE( srcVector[ i ], 255.f );
             }
 #ifdef CL_1RGB_APPLE
-            if (imageFormat->image_channel_order == CL_1RGB_APPLE)
+            if( imageFormat->image_channel_order == CL_1RGB_APPLE )
                 ptr[0] = 255.0f;
 #endif
 #ifdef CL_BGR1_APPLE
-            if (imageFormat->image_channel_order == CL_BGR1_APPLE)
+            if( imageFormat->image_channel_order == CL_BGR1_APPLE )
                 ptr[3] = 255.0f;
 #endif
             break;
         }
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned short)NORMALIZE(srcVector[i], 65535.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned short)NORMALIZE( srcVector[ i ], 65535.f );
             break;
         }
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            ptr[0] =
-                (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 10)
-                | (((unsigned short)NORMALIZE(srcVector[1], 31.f) & 31) << 5)
-                | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
+            ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 10 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 31.f ) & 31 ) << 5 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
             break;
         }
-        case CL_UNORM_SHORT_565: {
+        case CL_UNORM_SHORT_565:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            ptr[0] =
-                (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 11)
-                | (((unsigned short)NORMALIZE(srcVector[1], 63.f) & 63) << 5)
-                | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
+            ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 11 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 63.f ) & 63 ) << 5 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
             break;
         }
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             cl_uint *ptr = (cl_uint *)outData;
-            ptr[0] =
-                (((unsigned int)NORMALIZE(srcVector[0], 1023.f) & 1023) << 20)
-                | (((unsigned int)NORMALIZE(srcVector[1], 1023.f) & 1023) << 10)
-                | (((unsigned int)NORMALIZE(srcVector[2], 1023.f) & 1023) << 0);
+            ptr[ 0 ] = ( ( (unsigned int)NORMALIZE( srcVector[ 0 ], 1023.f ) & 1023 ) << 20 ) |
+            ( ( (unsigned int)NORMALIZE( srcVector[ 1 ], 1023.f ) & 1023 ) << 10 ) |
+            ( ( (unsigned int)NORMALIZE( srcVector[ 2 ], 1023.f ) & 1023 ) << 0 );
             break;
         }
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             cl_char *ptr = (cl_char *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (cl_char)CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_char)CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
             break;
         }
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             cl_short *ptr = (cl_short *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (short)CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (short)CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
             break;
         }
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             cl_int *ptr = (cl_int *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (int)CONVERT_INT(
-                    srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
-                    MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
-                    CL_INT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (int)CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  );
             break;
         }
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             cl_uchar *ptr = (cl_uchar *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (cl_uchar)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_uchar)CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
             break;
         }
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (cl_ushort)CONVERT_UINT(srcVector[i], 32767.f,
-                                                 CL_USHRT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_ushort)CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
             break;
         }
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             cl_uint *ptr = (cl_uint *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (cl_uint)CONVERT_UINT(
-                    srcVector[i],
-                    MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
-                    CL_UINT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_uint)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  );
             break;
         }
 #ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: {
-            cl_ushort *ptr = (cl_ushort *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
+        case CL_SFIXED14_APPLE:
+        {
+            cl_ushort *ptr = (cl_ushort*)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
             {
-                cl_float f = fmaxf(srcVector[i], -1.0f);
-                f = fminf(f, 3.0f);
+                cl_float f = fmaxf( srcVector[i], -1.0f );
+                f = fminf( f, 3.0f );
                 cl_int d = rintf(f * 0x1.0p14f);
                 d += 16384;
-                if (d > CL_USHRT_MAX) d = CL_USHRT_MAX;
+                if( d > CL_USHRT_MAX )
+                    d = CL_USHRT_MAX;
                 ptr[i] = d;
             }
             break;
         }
 #endif
         default:
-            log_error("INTERNAL ERROR: unknown format (%d)\n",
-                      imageFormat->image_channel_data_type);
+            log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
             exit(-1);
             break;
     }
 }
 
-void pack_image_pixel_error(const float *srcVector,
-                            const cl_image_format *imageFormat,
-                            const void *results, float *errors)
+void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors )
 {
-    size_t channelCount = get_format_channel_count(imageFormat);
-    switch (imageFormat->image_channel_data_type)
+    size_t channelCount = get_format_channel_count( imageFormat );
+    switch( imageFormat->image_channel_data_type )
     {
-        case CL_HALF_FLOAT: {
-            const cl_half *ptr = (const cl_half *)results;
-
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = Ulp_Error_Half(ptr[i], srcVector[i]);
-
-            break;
-        }
-
-        case CL_FLOAT: {
+        case CL_HALF_FLOAT:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
 
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = Ulp_Error(ptr[i], srcVector[i]);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = Ulp_Error_Half( ptr[i], srcVector[i] );
 
             break;
         }
 
-        case CL_SNORM_INT8: {
+        case CL_FLOAT:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = Ulp_Error( ptr[i], srcVector[i] );
+
+            break;
+        }
+
+        case CL_SNORM_INT8:
+        {
             const cl_char *ptr = (const cl_char *)results;
 
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -127.0f, 127.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -127.0f, 127.f );
 
             break;
         }
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             const cl_short *ptr = (const cl_short *)results;
 
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -32767.f,
-                                                 32767.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -32767.f, 32767.f  );
 
             break;
         }
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             const cl_uchar *ptr = (const cl_uchar *)results;
 
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 255.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 255.f  );
 
             break;
         }
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
 
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 65535.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 65535.f  );
 
             break;
         }
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
 
-            errors[0] =
-                ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
-            errors[1] =
-                ((ptr[0] >> 5) & 31) - NORMALIZE_UNROUNDED(srcVector[1], 31.f);
-            errors[2] =
-                ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
+            errors[0] = ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
+            errors[1] = ((ptr[0] >>  5) & 31) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 31.f );
+            errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
 
             break;
         }
-        case CL_UNORM_SHORT_565: {
+        case CL_UNORM_SHORT_565:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
 
-            errors[0] =
-                ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
-            errors[1] =
-                ((ptr[0] >> 5) & 63) - NORMALIZE_UNROUNDED(srcVector[1], 63.f);
-            errors[2] =
-                ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
+            errors[0] = ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
+            errors[1] = ((ptr[0] >>  5) & 63) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 63.f );
+            errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
 
             break;
         }
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             const cl_uint *ptr = (const cl_uint *)results;
 
-            errors[0] = ((ptr[0] >> 20) & 1023)
-                - NORMALIZE_UNROUNDED(srcVector[0], 1023.f);
-            errors[1] = ((ptr[0] >> 10) & 1023)
-                - NORMALIZE_UNROUNDED(srcVector[1], 1023.f);
-            errors[2] = ((ptr[0] >> 0) & 1023)
-                - NORMALIZE_UNROUNDED(srcVector[2], 1023.f);
+            errors[0] = ((ptr[0] >> 20) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 1023.f );
+            errors[1] = ((ptr[0] >> 10) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 1023.f );
+            errors[2] = ((ptr[0] >>  0) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 1023.f );
 
             break;
         }
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             const cl_char *ptr = (const cl_char *)results;
 
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] =
-                    ptr[i] - CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[ i ] = ptr[i] - CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
 
             break;
         }
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             const cl_short *ptr = (const cl_short *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[ i ] - CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
             break;
         }
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             const cl_int *ptr = (const cl_int *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_float)(
-                    (cl_long)ptr[i]
-                    - (cl_long)CONVERT_INT(
-                        srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
-                        MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
-                        CL_INT_MAX));
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long) CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  ));
             break;
         }
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             const cl_uchar *ptr = (const cl_uchar *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_int)ptr[i]
-                    - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
             break;
         }
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_int)ptr[i]
-                    - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, CL_USHRT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
             break;
         }
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             const cl_uint *ptr = (const cl_uint *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_float)(
-                    (cl_long)ptr[i]
-                    - (cl_long)CONVERT_UINT(
-                        srcVector[i],
-                        MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
-                        CL_UINT_MAX));
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  ));
             break;
         }
 #ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: {
+        case CL_SFIXED14_APPLE:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
 
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - NORMALIZE_SIGNED_UNROUNDED(((int)srcVector[i] - 16384),
-                                                 -16384.f, 49151.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( ((int) srcVector[ i ] - 16384), -16384.f, 49151.f  );
 
             break;
         }
 #endif
         default:
-            log_error("INTERNAL ERROR: unknown format (%d)\n",
-                      imageFormat->image_channel_data_type);
+            log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
             exit(-1);
             break;
     }
@@ -2944,238 +2932,208 @@
 
 //
 //  Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
-//  This should be called lazily before attempting to verify image writes,
-//  otherwise an error will occur.
+//  This should be called lazily before attempting to verify image writes, otherwise an error will occur.
 //
-int DetectFloatToHalfRoundingMode(
-    cl_command_queue q) // Returns CL_SUCCESS on success
+int  DetectFloatToHalfRoundingMode( cl_command_queue q )  // Returns CL_SUCCESS on success
 {
     cl_int err = CL_SUCCESS;
 
-    if (gFloatToHalfRoundingMode == kDefaultRoundingMode)
+    if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
     {
-        // Some numbers near 0.5f, that we look at to see how the values are
-        // rounded.
-        static const cl_uint inData[4 * 4] = {
-            0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U,
-            0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
-            0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U,
-            0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U
-        };
-        static const size_t count = sizeof(inData) / (4 * sizeof(inData[0]));
-        const float *inp = (const float *)inData;
+        // Some numbers near 0.5f, that we look at to see how the values are rounded.
+        static const cl_uint  inData[4*4] = {   0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
+                                                0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U    };
+        static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
+        const float *inp = (const float*) inData;
         cl_context context = NULL;
 
-        // Create an input buffer
-        err = clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(context),
-                                    &context, NULL);
-        if (err)
+    // Create an input buffer
+        err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
+        if( err )
         {
-            log_error("Error:  could not get context from command queue in "
-                      "DetectFloatToHalfRoundingMode  (%d)",
-                      err);
+            log_error( "Error:  could not get context from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
             return err;
         }
 
-        cl_mem inBuf = clCreateBuffer(context,
-                                      CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR
-                                          | CL_MEM_ALLOC_HOST_PTR,
-                                      sizeof(inData), (void *)inData, &err);
-        if (NULL == inBuf || err)
+        cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
+        if( NULL == inBuf || err )
         {
-            log_error("Error:  could not create input buffer in "
-                      "DetectFloatToHalfRoundingMode  (err: %d)",
-                      err);
+            log_error( "Error:  could not create input buffer in DetectFloatToHalfRoundingMode  (err: %d)", err );
             return err;
         }
 
-        // Create a small output image
+    // Create a small output image
         cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
-        cl_mem outImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &fmt,
-                                          count, 1, 0, NULL, &err);
-        if (NULL == outImage || err)
+        cl_mem outImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &fmt, count, 1, 0, NULL, &err );
+        if( NULL == outImage || err )
         {
-            log_error("Error:  could not create half float out image in "
-                      "DetectFloatToHalfRoundingMode  (err: %d)",
-                      err);
-            clReleaseMemObject(inBuf);
+            log_error( "Error:  could not create half float out image in DetectFloatToHalfRoundingMode  (err: %d)", err );
+            clReleaseMemObject( inBuf );
             return err;
         }
 
-        // Create our program, and a kernel
-        const char *kernelSource[1] = {
-            "kernel void detect_round( global float4 *in, write_only image2d_t "
-            "out )\n"
+    // Create our program, and a kernel
+        const char *kernel[1] = {
+            "kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
             "{\n"
-            "   write_imagef( out, (int2)(get_global_id(0),0), "
-            "in[get_global_id(0)] );\n"
-            "}\n"
-        };
+            "   write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
+            "}\n" };
 
         clProgramWrapper program;
-        clKernelWrapper kernel;
-        err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                          kernelSource, "detect_round");
+        err = create_single_kernel_helper_create_program(context, &program, 1, kernel);
 
-        if (NULL == program || err)
+        if( NULL == program || err )
         {
-            log_error("Error:  could not create program in "
-                      "DetectFloatToHalfRoundingMode (err: %d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
         }
 
         cl_device_id device = NULL;
-        err = clGetCommandQueueInfo(q, CL_QUEUE_DEVICE, sizeof(device), &device,
-                                    NULL);
-        if (err)
+        err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
+        if( err )
         {
-            log_error("Error:  could not get device from command queue in "
-                      "DetectFloatToHalfRoundingMode  (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not get device from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
         }
 
-        err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inBuf);
-        if (err)
+        err = clBuildProgram( program, 1, &device, "", NULL, NULL );
+        if( err )
         {
-            log_error("Error: could not set argument 0 of kernel in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not build program in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
         }
 
-        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outImage);
-        if (err)
+        cl_kernel k = clCreateKernel( program, "detect_round", &err );
+        if( NULL == k || err )
         {
-            log_error("Error: could not set argument 1 of kernel in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not create kernel in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
         }
 
-        // Run the kernel
+        err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
+        if( err )
+        {
+            log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+
+        err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
+        if( err )
+        {
+            log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+
+    // Run the kernel
         size_t global_work_size = count;
-        err = clEnqueueNDRangeKernel(q, kernel, 1, NULL, &global_work_size,
-                                     NULL, 0, NULL, NULL);
-        if (err)
+        err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
+        if( err )
         {
-            log_error("Error: could not enqueue kernel in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
             return err;
         }
 
-        // read the results
-        cl_half outBuf[count * 4];
-        memset(outBuf, -1, sizeof(outBuf));
-        size_t origin[3] = { 0, 0, 0 };
-        size_t region[3] = { count, 1, 1 };
-        err = clEnqueueReadImage(q, outImage, CL_TRUE, origin, region, 0, 0,
-                                 outBuf, 0, NULL, NULL);
-        if (err)
+    // read the results
+        cl_ushort outBuf[count*4];
+        memset( outBuf, -1, sizeof( outBuf ) );
+        size_t origin[3] = {0,0,0};
+        size_t region[3] = {count,1,1};
+        err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
+        if( err )
         {
-            log_error("Error: could not read output image in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
             return err;
         }
 
-        // Generate our list of reference results
-        cl_half rte_ref[count * 4];
-        cl_half rtz_ref[count * 4];
-        for (size_t i = 0; i < 4 * count; i++)
+    // Generate our list of reference results
+        cl_ushort rte_ref[count*4];
+        cl_ushort rtz_ref[count*4];
+        for( size_t i = 0; i < 4 * count; i++ )
         {
-            rte_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTE);
-            rtz_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTZ);
+            rte_ref[i] = float2half_rte( inp[i] );
+            rtz_ref[i] = float2half_rtz( inp[i] );
         }
 
-        // Verify that we got something in either rtz or rte mode
-        if (0 == memcmp(rte_ref, outBuf, sizeof(rte_ref)))
+    // Verify that we got something in either rtz or rte mode
+        if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
         {
-            log_info("Autodetected float->half rounding mode to be rte\n");
+            log_info( "Autodetected float->half rounding mode to be rte\n" );
             gFloatToHalfRoundingMode = kRoundToNearestEven;
         }
-        else if (0 == memcmp(rtz_ref, outBuf, sizeof(rtz_ref)))
+        else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
         {
-            log_info("Autodetected float->half rounding mode to be rtz\n");
+            log_info( "Autodetected float->half rounding mode to be rtz\n" );
             gFloatToHalfRoundingMode = kRoundTowardZero;
         }
         else
         {
-            log_error("ERROR: float to half conversions proceed with invalid "
-                      "rounding mode!\n");
-            log_info("\nfor:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {%a, %a, %a, %a},", inp[4 * i], inp[4 * i + 1],
-                         inp[4 * i + 2], inp[4 * i + 3]);
-            log_info("\ngot:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
-                         outBuf[4 * i], outBuf[4 * i + 1], outBuf[4 * i + 2],
-                         outBuf[4 * i + 3]);
-            log_info("\nrte:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
-                         rte_ref[4 * i], rte_ref[4 * i + 1], rte_ref[4 * i + 2],
-                         rte_ref[4 * i + 3]);
-            log_info("\nrtz:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
-                         rtz_ref[4 * i], rtz_ref[4 * i + 1], rtz_ref[4 * i + 2],
-                         rtz_ref[4 * i + 3]);
-            log_info("\n");
+            log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
+            log_info( "\nfor:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
+            log_info( "\ngot:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
+            log_info( "\nrte:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
+            log_info( "\nrtz:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
+            log_info( "\n" );
             err = -1;
-            gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
+            gFloatToHalfRoundingMode = kRoundingModeCount;  // illegal value
         }
 
-        // clean up
-        clReleaseMemObject(inBuf);
-        clReleaseMemObject(outImage);
+    // clean up
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseKernel( k );
         return err;
     }
 
-    // Make sure that the rounding mode was successfully detected, if we checked
-    // earlier
-    if (gFloatToHalfRoundingMode != kRoundToNearestEven
-        && gFloatToHalfRoundingMode != kRoundTowardZero)
+    // Make sure that the rounding mode was successfully detected, if we checked earlier
+    if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
         return -2;
 
     return err;
 }
 
-char *create_random_image_data(ExplicitType dataType,
-                               image_descriptor *imageInfo,
-                               BufferOwningPtr<char> &P, MTdata d,
-                               bool image2DFromBuffer)
+char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer )
 {
-    size_t allocSize, numPixels;
-    if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
-    {
-        allocSize = (size_t)(compute_mipmapped_image_size(*imageInfo) * 4
-                             * get_explicit_type_size(dataType))
-            / get_pixel_size(imageInfo->format);
-        numPixels = allocSize / (get_explicit_type_size(dataType) * 4);
-    }
-    else
-    {
-        numPixels = (image2DFromBuffer ? imageInfo->rowPitch : imageInfo->width)
-            * imageInfo->height * (imageInfo->depth ? imageInfo->depth : 1)
-            * (imageInfo->arraySize ? imageInfo->arraySize : 1);
-        allocSize = numPixels * 4 * get_explicit_type_size(dataType);
-    }
+  size_t allocSize, numPixels;
+  if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
+  {
+    allocSize = (size_t) (compute_mipmapped_image_size(*imageInfo) * 4 * get_explicit_type_size( dataType ))/get_pixel_size(imageInfo->format);
+    numPixels = allocSize / (get_explicit_type_size( dataType ) * 4);
+  }
+  else
+  {
+    numPixels = (image2DFromBuffer? imageInfo->rowPitch: imageInfo->width) * imageInfo->height
+      * (imageInfo->depth ? imageInfo->depth : 1)
+      * (imageInfo->arraySize ? imageInfo->arraySize : 1);
+    allocSize = numPixels * 4 * get_explicit_type_size( dataType );
+  }
 
 #if 0 // DEBUG
     {
@@ -3192,201 +3150,199 @@
     }
 #endif
 
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     char *data = NULL;
-    if (gDeviceType == CL_DEVICE_TYPE_CPU)
-    {
-        size_t mapSize =
-            ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
+    if (gDeviceType == CL_DEVICE_TYPE_CPU) {
+      size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
 
-        void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
-                         MAP_ANON | MAP_PRIVATE, 0, 0);
-        if (map == MAP_FAILED)
-        {
-            perror("create_random_image_data: mmap");
-            log_error("%s:%d: mmap failed, mapSize = %zu\n", __FILE__, __LINE__,
-                      mapSize);
-        }
-        intptr_t data_end = (intptr_t)map + mapSize - 4096;
-        data = (char *)(data_end - (intptr_t)allocSize);
+      void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+      if (map == MAP_FAILED)
+      {
+        perror("create_random_image_data: mmap");
+        log_error("%s:%d: mmap failed, mapSize = %zu\n",__FILE__,__LINE__,mapSize);
+      }
+      intptr_t data_end = (intptr_t)map + mapSize - 4096;
+      data = (char *)(data_end - (intptr_t)allocSize);
 
-        mprotect(map, 4096, PROT_NONE);
-        mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
-        P.reset(data, map, mapSize);
-    }
-    else
-    {
-        data = (char *)malloc(allocSize);
-        P.reset(data);
+      mprotect(map, 4096, PROT_NONE);
+      mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
+      P.reset(data, map, mapSize);
+    } else {
+      data = (char *)malloc(allocSize);
+      P.reset(data);
     }
 #else
-    char *data =
-        (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
-    P.reset(data, NULL, 0, allocSize, true);
+    char *data = (char *)align_malloc(allocSize, get_pixel_size(imageInfo->format));
+    P.reset(data,NULL,0,allocSize,true);
 #endif
 
-    if (data == NULL)
-    {
-        log_error(
-            "ERROR: Unable to malloc %lu bytes for create_random_image_data\n",
-            allocSize);
+    if (data == NULL) {
+        log_error( "ERROR: Unable to malloc %lu bytes for create_random_image_data\n", allocSize );
         return NULL;
     }
 
-    switch (dataType)
+    switch( dataType )
     {
-        case kFloat: {
+        case kFloat:
+        {
             float *inputValues = (float *)data;
             switch (imageInfo->format->image_channel_data_type)
             {
-                case CL_HALF_FLOAT: {
-                    // Generate data that is (mostly) inside the range of a half
-                    // float const float HALF_MIN = 5.96046448e-08f;
-                    const float HALF_MAX = 65504.0f;
-
-                    size_t i = 0;
-                    inputValues[i++] = 0.f;
-                    inputValues[i++] = 1.f;
-                    inputValues[i++] = -1.f;
-                    inputValues[i++] = 2.f;
-                    for (; i < numPixels * 4; i++)
-                        inputValues[i] = get_random_float(-HALF_MAX - 2.f,
-                                                          HALF_MAX + 2.f, d);
-                }
-                break;
-#ifdef CL_SFIXED14_APPLE
-                case CL_SFIXED14_APPLE: {
-                    size_t i = 0;
-                    if (numPixels * 4 >= 8)
+                case CL_HALF_FLOAT:
                     {
-                        inputValues[i++] = INFINITY;
-                        inputValues[i++] = 0x1.0p14f;
-                        inputValues[i++] = 0x1.0p31f;
-                        inputValues[i++] = 0x1.0p32f;
-                        inputValues[i++] = -INFINITY;
-                        inputValues[i++] = -0x1.0p14f;
-                        inputValues[i++] = -0x1.0p31f;
-                        inputValues[i++] = -0x1.1p31f;
+                        // Generate data that is (mostly) inside the range of a half float
+                        // const float HALF_MIN = 5.96046448e-08f;
+                        const float HALF_MAX = 65504.0f;
+
+                        size_t i = 0;
+                        inputValues[ i++ ] = 0.f;
+                        inputValues[ i++ ] = 1.f;
+                        inputValues[ i++ ] = -1.f;
+                        inputValues[ i++ ] = 2.f;
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -HALF_MAX - 2.f, HALF_MAX + 2.f, d );
                     }
-                    for (; i < numPixels * 4; i++)
-                        inputValues[i] = get_random_float(-1.1f, 3.1f, d);
-                }
-                break;
+                    break;
+#ifdef CL_SFIXED14_APPLE
+                case CL_SFIXED14_APPLE:
+                    {
+                        size_t i = 0;
+                        if( numPixels * 4 >= 8 )
+                        {
+                            inputValues[ i++ ] = INFINITY;
+                            inputValues[ i++ ] = 0x1.0p14f;
+                            inputValues[ i++ ] = 0x1.0p31f;
+                            inputValues[ i++ ] = 0x1.0p32f;
+                            inputValues[ i++ ] = -INFINITY;
+                            inputValues[ i++ ] = -0x1.0p14f;
+                            inputValues[ i++ ] = -0x1.0p31f;
+                            inputValues[ i++ ] = -0x1.1p31f;
+                        }
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -1.1f, 3.1f, d );
+                    }
+                    break;
 #endif
-                case CL_FLOAT: {
-                    size_t i = 0;
-                    inputValues[i++] = INFINITY;
-                    inputValues[i++] = -INFINITY;
-                    inputValues[i++] = 0.0f;
-                    inputValues[i++] = 0.0f;
-                    cl_uint *p = (cl_uint *)data;
-                    for (; i < numPixels * 4; i++) p[i] = genrand_int32(d);
-                }
-                break;
+                case CL_FLOAT:
+                    {
+                        size_t i = 0;
+                        inputValues[ i++ ] = INFINITY;
+                        inputValues[ i++ ] = -INFINITY;
+                        inputValues[ i++ ] = 0.0f;
+                        inputValues[ i++ ] = 0.0f;
+                        cl_uint *p = (cl_uint *)data;
+                        for( ; i < numPixels * 4; i++ )
+                            p[ i ] = genrand_int32(d);
+                    }
+                    break;
 
                 default:
                     size_t i = 0;
-                    if (numPixels * 4 >= 36)
+                    if( numPixels * 4 >= 36 )
                     {
-                        inputValues[i++] = 0.0f;
-                        inputValues[i++] = 0.5f;
-                        inputValues[i++] = 31.5f;
-                        inputValues[i++] = 32.0f;
-                        inputValues[i++] = 127.5f;
-                        inputValues[i++] = 128.0f;
-                        inputValues[i++] = 255.5f;
-                        inputValues[i++] = 256.0f;
-                        inputValues[i++] = 1023.5f;
-                        inputValues[i++] = 1024.0f;
-                        inputValues[i++] = 32767.5f;
-                        inputValues[i++] = 32768.0f;
-                        inputValues[i++] = 65535.5f;
-                        inputValues[i++] = 65536.0f;
-                        inputValues[i++] = 2147483648.0f;
-                        inputValues[i++] = 4294967296.0f;
-                        inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
-                        inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
-                        inputValues[i++] = -0.0f;
-                        inputValues[i++] = -0.5f;
-                        inputValues[i++] = -31.5f;
-                        inputValues[i++] = -32.0f;
-                        inputValues[i++] = -127.5f;
-                        inputValues[i++] = -128.0f;
-                        inputValues[i++] = -255.5f;
-                        inputValues[i++] = -256.0f;
-                        inputValues[i++] = -1023.5f;
-                        inputValues[i++] = -1024.0f;
-                        inputValues[i++] = -32767.5f;
-                        inputValues[i++] = -32768.0f;
-                        inputValues[i++] = -65535.5f;
-                        inputValues[i++] = -65536.0f;
-                        inputValues[i++] = -2147483648.0f;
-                        inputValues[i++] = -4294967296.0f;
-                        inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
-                        inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
+                        inputValues[ i++ ] = 0.0f;
+                        inputValues[ i++ ] = 0.5f;
+                        inputValues[ i++ ] = 31.5f;
+                        inputValues[ i++ ] = 32.0f;
+                        inputValues[ i++ ] = 127.5f;
+                        inputValues[ i++ ] = 128.0f;
+                        inputValues[ i++ ] = 255.5f;
+                        inputValues[ i++ ] = 256.0f;
+                        inputValues[ i++ ] = 1023.5f;
+                        inputValues[ i++ ] = 1024.0f;
+                        inputValues[ i++ ] = 32767.5f;
+                        inputValues[ i++ ] = 32768.0f;
+                        inputValues[ i++ ] = 65535.5f;
+                        inputValues[ i++ ] = 65536.0f;
+                        inputValues[ i++ ] = 2147483648.0f;
+                        inputValues[ i++ ] = 4294967296.0f;
+                        inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
+                        inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
+                        inputValues[ i++ ] = -0.0f;
+                        inputValues[ i++ ] = -0.5f;
+                        inputValues[ i++ ] = -31.5f;
+                        inputValues[ i++ ] = -32.0f;
+                        inputValues[ i++ ] = -127.5f;
+                        inputValues[ i++ ] = -128.0f;
+                        inputValues[ i++ ] = -255.5f;
+                        inputValues[ i++ ] = -256.0f;
+                        inputValues[ i++ ] = -1023.5f;
+                        inputValues[ i++ ] = -1024.0f;
+                        inputValues[ i++ ] = -32767.5f;
+                        inputValues[ i++ ] = -32768.0f;
+                        inputValues[ i++ ] = -65535.5f;
+                        inputValues[ i++ ] = -65536.0f;
+                        inputValues[ i++ ] = -2147483648.0f;
+                        inputValues[ i++ ] = -4294967296.0f;
+                        inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
+                        inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
                     }
-                    if (is_format_signed(imageInfo->format))
+                    if( is_format_signed(imageInfo->format) )
                     {
-                        for (; i < numPixels * 4; i++)
-                            inputValues[i] = get_random_float(-1.1f, 1.1f, d);
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -1.1f, 1.1f, d );
                     }
                     else
                     {
-                        for (; i < numPixels * 4; i++)
-                            inputValues[i] = get_random_float(-0.1f, 1.1f, d);
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
                     }
                     break;
             }
             break;
         }
 
-        case kInt: {
+        case kInt:
+        {
             int *imageData = (int *)data;
 
             // We want to generate ints (mostly) in range of the target format
-            int formatMin = get_format_min_int(imageInfo->format);
-            size_t formatMax = get_format_max_int(imageInfo->format);
-            if (formatMin == 0)
+            int formatMin = get_format_min_int( imageInfo->format );
+            size_t formatMax = get_format_max_int( imageInfo->format );
+            if( formatMin == 0 )
             {
-                // Unsigned values, but we are only an int, so cap the actual
-                // max at the max of signed ints
-                if (formatMax > 2147483647L) formatMax = 2147483647L;
+                // Unsigned values, but we are only an int, so cap the actual max at the max of signed ints
+                if( formatMax > 2147483647L )
+                    formatMax = 2147483647L;
             }
-            // If the final format is small enough, give us a bit of room for
-            // out-of-range values to test
-            if (formatMax < 2147483647L) formatMax += 2;
-            if (formatMin > -2147483648LL) formatMin -= 2;
+            // If the final format is small enough, give us a bit of room for out-of-range values to test
+            if( formatMax < 2147483647L )
+                formatMax += 2;
+            if( formatMin > -2147483648LL )
+                formatMin -= 2;
 
             // Now gen
-            for (size_t i = 0; i < numPixels * 4; i++)
+            for( size_t i = 0; i < numPixels * 4; i++ )
             {
-                imageData[i] = random_in_range(formatMin, (int)formatMax, d);
+                imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
             }
             break;
         }
 
         case kUInt:
-        case kUnsignedInt: {
+        case kUnsignedInt:
+        {
             unsigned int *imageData = (unsigned int *)data;
 
             // We want to generate ints (mostly) in range of the target format
-            int formatMin = get_format_min_int(imageInfo->format);
-            size_t formatMax = get_format_max_int(imageInfo->format);
-            if (formatMin < 0) formatMin = 0;
-            // If the final format is small enough, give us a bit of room for
-            // out-of-range values to test
-            if (formatMax < 4294967295LL) formatMax += 2;
+            int formatMin = get_format_min_int( imageInfo->format );
+            size_t formatMax = get_format_max_int( imageInfo->format );
+            if( formatMin < 0 )
+                formatMin = 0;
+            // If the final format is small enough, give us a bit of room for out-of-range values to test
+            if( formatMax < 4294967295LL )
+                formatMax += 2;
 
             // Now gen
-            for (size_t i = 0; i < numPixels * 4; i++)
+            for( size_t i = 0; i < numPixels * 4; i++ )
             {
-                imageData[i] = random_in_range(formatMin, (int)formatMax, d);
+                imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
             }
             break;
         }
         default:
             // Unsupported source format
-            delete[] data;
+            delete [] data;
             return NULL;
     }
 
@@ -3395,8 +3351,7 @@
 
 /*
     deprecated
-bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t
-max, int &outValue )
+bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue )
 {
     int v = (int)value;
 
@@ -3416,8 +3371,8 @@
             return false;
 
         case CL_ADDRESS_MIRRORED_REPEAT:
-            log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do
-we ever use this? exit(-1);
+            log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do we ever use this?
+            exit(-1);
 
         default:
             if( v < 0 )
@@ -3437,203 +3392,170 @@
 }
 */
 
-void get_sampler_kernel_code(image_sampler_data *imageSampler, char *outLine)
+void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine )
 {
     const char *normalized;
     const char *addressMode;
     const char *filterMode;
 
-    if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP)
+    if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP )
         addressMode = "CLK_ADDRESS_CLAMP";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
         addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_REPEAT)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
         addressMode = "CLK_ADDRESS_REPEAT";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
         addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
         addressMode = "CLK_ADDRESS_NONE";
     else
     {
-        log_error("**Error: Unknown addressing mode! Aborting...\n");
+        log_error( "**Error: Unknown addressing mode! Aborting...\n" );
         abort();
     }
 
-    if (imageSampler->normalized_coords)
+    if( imageSampler->normalized_coords )
         normalized = "CLK_NORMALIZED_COORDS_TRUE";
     else
         normalized = "CLK_NORMALIZED_COORDS_FALSE";
 
-    if (imageSampler->filter_mode == CL_FILTER_LINEAR)
+    if( imageSampler->filter_mode == CL_FILTER_LINEAR )
         filterMode = "CLK_FILTER_LINEAR";
     else
         filterMode = "CLK_FILTER_NEAREST";
 
-    sprintf(outLine, "    const sampler_t imageSampler = %s | %s | %s;\n",
-            addressMode, filterMode, normalized);
+    sprintf( outLine, "    const sampler_t imageSampler = %s | %s | %s;\n", addressMode, filterMode, normalized );
 }
 
-void copy_image_data(image_descriptor *srcImageInfo,
-                     image_descriptor *dstImageInfo, void *imageValues,
-                     void *destImageValues, const size_t sourcePos[],
-                     const size_t destPos[], const size_t regionSize[])
+void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                     const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] )
 {
-    //  assert( srcImageInfo->format == dstImageInfo->format );
+  //  assert( srcImageInfo->format == dstImageInfo->format );
 
-    size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
-    size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
-    size_t src_row_pitch_lod, src_slice_pitch_lod;
-    size_t dst_row_pitch_lod, dst_slice_pitch_lod;
+  size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
+  size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
+  size_t src_row_pitch_lod, src_slice_pitch_lod;
+  size_t dst_row_pitch_lod, dst_slice_pitch_lod;
 
-    size_t pixelSize = get_pixel_size(srcImageInfo->format);
+  size_t pixelSize = get_pixel_size( srcImageInfo->format );
 
-    sourcePos_lod[0] = sourcePos[0];
-    sourcePos_lod[1] = sourcePos[1];
-    sourcePos_lod[2] = sourcePos[2];
-    destPos_lod[0] = destPos[0];
-    destPos_lod[1] = destPos[1];
-    destPos_lod[2] = destPos[2];
-    src_row_pitch_lod = srcImageInfo->rowPitch;
-    dst_row_pitch_lod = dstImageInfo->rowPitch;
-    src_slice_pitch_lod = srcImageInfo->slicePitch;
-    dst_slice_pitch_lod = dstImageInfo->slicePitch;
+  sourcePos_lod[0] = sourcePos[0];
+  sourcePos_lod[1] = sourcePos[1];
+  sourcePos_lod[2] = sourcePos[2];
+  destPos_lod[0] = destPos[0];
+  destPos_lod[1] = destPos[1];
+  destPos_lod[2] = destPos[2];
+  src_row_pitch_lod = srcImageInfo->rowPitch;
+  dst_row_pitch_lod = dstImageInfo->rowPitch;
+  src_slice_pitch_lod = srcImageInfo->slicePitch;
+  dst_slice_pitch_lod = dstImageInfo->slicePitch;
 
-    if (srcImageInfo->num_mip_levels > 1)
+  if( srcImageInfo->num_mip_levels > 1)
+  {
+    size_t src_width_lod = 1/*srcImageInfo->width*/;
+    size_t src_height_lod = 1/*srcImageInfo->height*/;
+    size_t src_depth_lod = 1/*srcImageInfo->depth*/;
+
+    switch( srcImageInfo->type )
     {
-        size_t src_width_lod = 1 /*srcImageInfo->width*/;
-        size_t src_height_lod = 1 /*srcImageInfo->height*/;
-        size_t src_depth_lod = 1 /*srcImageInfo->depth*/;
+    case CL_MEM_OBJECT_IMAGE1D:
+      src_lod = sourcePos[1];
+      sourcePos_lod[1] = sourcePos_lod[2] = 0;
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE2D:
+      src_lod = sourcePos[2];
+      sourcePos_lod[1] = sourcePos[1];
+      sourcePos_lod[2] = 0;
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
+        src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE3D:
+      src_lod = sourcePos[3];
+      sourcePos_lod[1] = sourcePos[1];
+      sourcePos_lod[2] = sourcePos[2];
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
+      if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
+        src_depth_lod = (srcImageInfo->depth >> src_lod ) ? ( srcImageInfo->depth >> src_lod ): 1;
+      break;
 
-        switch (srcImageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                src_lod = sourcePos[1];
-                sourcePos_lod[1] = sourcePos_lod[2] = 0;
-                src_width_lod = (srcImageInfo->width >> src_lod)
-                    ? (srcImageInfo->width >> src_lod)
-                    : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE2D:
-                src_lod = sourcePos[2];
-                sourcePos_lod[1] = sourcePos[1];
-                sourcePos_lod[2] = 0;
-                src_width_lod = (srcImageInfo->width >> src_lod)
-                    ? (srcImageInfo->width >> src_lod)
-                    : 1;
-                if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
-                    src_height_lod = (srcImageInfo->height >> src_lod)
-                        ? (srcImageInfo->height >> src_lod)
-                        : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE3D:
-                src_lod = sourcePos[3];
-                sourcePos_lod[1] = sourcePos[1];
-                sourcePos_lod[2] = sourcePos[2];
-                src_width_lod = (srcImageInfo->width >> src_lod)
-                    ? (srcImageInfo->width >> src_lod)
-                    : 1;
-                src_height_lod = (srcImageInfo->height >> src_lod)
-                    ? (srcImageInfo->height >> src_lod)
-                    : 1;
-                if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-                    src_depth_lod = (srcImageInfo->depth >> src_lod)
-                        ? (srcImageInfo->depth >> src_lod)
-                        : 1;
-                break;
-        }
-        src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
-        src_row_pitch_lod =
-            src_width_lod * get_pixel_size(srcImageInfo->format);
-        src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
+    }
+    src_mip_level_offset = compute_mip_level_offset( srcImageInfo, src_lod );
+    src_row_pitch_lod = src_width_lod * get_pixel_size( srcImageInfo->format );
+    src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
+  }
+
+  if( dstImageInfo->num_mip_levels > 1)
+  {
+    size_t dst_width_lod = 1/*dstImageInfo->width*/;
+    size_t dst_height_lod = 1/*dstImageInfo->height*/;
+    size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
+    switch( dstImageInfo->type )
+    {
+    case CL_MEM_OBJECT_IMAGE1D:
+      dst_lod = destPos[1];
+      destPos_lod[1] = destPos_lod[2] = 0;
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE2D:
+      dst_lod = destPos[2];
+      destPos_lod[1] = destPos[1];
+      destPos_lod[2] = 0;
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
+        dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE3D:
+      dst_lod = destPos[3];
+      destPos_lod[1] = destPos[1];
+      destPos_lod[2] = destPos[2];
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
+      if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
+        dst_depth_lod = (dstImageInfo->depth >> dst_lod ) ? ( dstImageInfo->depth >> dst_lod ): 1;
+      break;
+
+    }
+    dst_mip_level_offset = compute_mip_level_offset( dstImageInfo, dst_lod );
+    dst_row_pitch_lod = dst_width_lod * get_pixel_size( dstImageInfo->format);
+    dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
+  }
+
+  // Get initial pointers
+  char *sourcePtr = (char *)imageValues + sourcePos_lod[ 2 ] * src_slice_pitch_lod + sourcePos_lod[ 1 ] * src_row_pitch_lod + pixelSize * sourcePos_lod[ 0 ] + src_mip_level_offset;
+  char *destPtr = (char *)destImageValues + destPos_lod[ 2 ] * dst_slice_pitch_lod + destPos_lod[ 1 ] * dst_row_pitch_lod + pixelSize * destPos_lod[ 0 ] + dst_mip_level_offset;
+
+  for( size_t z = 0; z < ( regionSize[ 2 ] > 0 ? regionSize[ 2 ] : 1 ); z++ )
+  {
+    char *rowSourcePtr = sourcePtr;
+    char *rowDestPtr = destPtr;
+    for( size_t y = 0; y < regionSize[ 1 ]; y++ )
+    {
+      memcpy( rowDestPtr, rowSourcePtr, pixelSize * regionSize[ 0 ] );
+      rowSourcePtr += src_row_pitch_lod;
+      rowDestPtr += dst_row_pitch_lod;
     }
 
-    if (dstImageInfo->num_mip_levels > 1)
-    {
-        size_t dst_width_lod = 1 /*dstImageInfo->width*/;
-        size_t dst_height_lod = 1 /*dstImageInfo->height*/;
-        size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
-        switch (dstImageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                dst_lod = destPos[1];
-                destPos_lod[1] = destPos_lod[2] = 0;
-                dst_width_lod = (dstImageInfo->width >> dst_lod)
-                    ? (dstImageInfo->width >> dst_lod)
-                    : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE2D:
-                dst_lod = destPos[2];
-                destPos_lod[1] = destPos[1];
-                destPos_lod[2] = 0;
-                dst_width_lod = (dstImageInfo->width >> dst_lod)
-                    ? (dstImageInfo->width >> dst_lod)
-                    : 1;
-                if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
-                    dst_height_lod = (dstImageInfo->height >> dst_lod)
-                        ? (dstImageInfo->height >> dst_lod)
-                        : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE3D:
-                dst_lod = destPos[3];
-                destPos_lod[1] = destPos[1];
-                destPos_lod[2] = destPos[2];
-                dst_width_lod = (dstImageInfo->width >> dst_lod)
-                    ? (dstImageInfo->width >> dst_lod)
-                    : 1;
-                dst_height_lod = (dstImageInfo->height >> dst_lod)
-                    ? (dstImageInfo->height >> dst_lod)
-                    : 1;
-                if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-                    dst_depth_lod = (dstImageInfo->depth >> dst_lod)
-                        ? (dstImageInfo->depth >> dst_lod)
-                        : 1;
-                break;
-        }
-        dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
-        dst_row_pitch_lod =
-            dst_width_lod * get_pixel_size(dstImageInfo->format);
-        dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
-    }
-
-    // Get initial pointers
-    char *sourcePtr = (char *)imageValues
-        + sourcePos_lod[2] * src_slice_pitch_lod
-        + sourcePos_lod[1] * src_row_pitch_lod + pixelSize * sourcePos_lod[0]
-        + src_mip_level_offset;
-    char *destPtr = (char *)destImageValues
-        + destPos_lod[2] * dst_slice_pitch_lod
-        + destPos_lod[1] * dst_row_pitch_lod + pixelSize * destPos_lod[0]
-        + dst_mip_level_offset;
-
-    for (size_t z = 0; z < (regionSize[2] > 0 ? regionSize[2] : 1); z++)
-    {
-        char *rowSourcePtr = sourcePtr;
-        char *rowDestPtr = destPtr;
-        for (size_t y = 0; y < regionSize[1]; y++)
-        {
-            memcpy(rowDestPtr, rowSourcePtr, pixelSize * regionSize[0]);
-            rowSourcePtr += src_row_pitch_lod;
-            rowDestPtr += dst_row_pitch_lod;
-        }
-
-        sourcePtr += src_slice_pitch_lod;
-        destPtr += dst_slice_pitch_lod;
-    }
+    sourcePtr += src_slice_pitch_lod;
+    destPtr += dst_slice_pitch_lod;
+  }
 }
 
 float random_float(float low, float high, MTdata d)
 {
-    float t = (float)genrand_real1(d);
+    float t = (float) genrand_real1(d);
     return (1.0f - t) * low + t * high;
 }
 
-CoordWalker::CoordWalker(void *coords, bool useFloats, size_t vecSize)
+CoordWalker::CoordWalker( void * coords, bool useFloats, size_t vecSize )
 {
-    if (useFloats)
+    if( useFloats )
     {
         mFloatCoords = (cl_float *)coords;
         mIntCoords = NULL;
@@ -3646,470 +3568,380 @@
     mVecSize = vecSize;
 }
 
-CoordWalker::~CoordWalker() {}
-
-cl_float CoordWalker::Get(size_t idx, size_t el)
+CoordWalker::~CoordWalker()
 {
-    if (mIntCoords != NULL)
-        return (cl_float)mIntCoords[idx * mVecSize + el];
+}
+
+cl_float CoordWalker::Get( size_t idx, size_t el )
+{
+    if( mIntCoords != NULL )
+        return (cl_float)mIntCoords[ idx * mVecSize + el ];
     else
-        return mFloatCoords[idx * mVecSize + el];
+        return mFloatCoords[ idx * mVecSize + el ];
 }
 
 
-void print_read_header(const cl_image_format *format,
-                       image_sampler_data *sampler, bool err, int t)
+void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err, int t )
 {
     const char *addressMode = NULL;
     const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };
 
-    if (sampler->addressing_mode == CL_ADDRESS_CLAMP)
+    if( sampler->addressing_mode == CL_ADDRESS_CLAMP )
         addressMode = "CL_ADDRESS_CLAMP";
-    else if (sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
+    else if( sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
         addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
-    else if (sampler->addressing_mode == CL_ADDRESS_REPEAT)
+    else if( sampler->addressing_mode == CL_ADDRESS_REPEAT )
         addressMode = "CL_ADDRESS_REPEAT";
-    else if (sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
+    else if( sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
         addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
     else
         addressMode = "CL_ADDRESS_NONE";
 
-    if (t)
+    if( t )
     {
-        if (err)
-            log_error("[%-7s %-24s %d] - %s - %s - %s - %s\n",
-                      GetChannelOrderName(format->image_channel_order),
-                      GetChannelTypeName(format->image_channel_data_type),
-                      (int)get_format_channel_count(format),
-                      sampler->filter_mode == CL_FILTER_NEAREST
-                          ? "CL_FILTER_NEAREST"
-                          : "CL_FILTER_LINEAR",
+        if( err )
+            log_error( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                      GetChannelTypeName( format->image_channel_data_type ),
+                      (int)get_format_channel_count( format ),
+                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                       addressMode,
                       normalizedNames[sampler->normalized_coords ? 1 : 0],
-                      t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
+                      t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
         else
-            log_info("[%-7s %-24s %d] - %s - %s - %s - %s\n",
-                     GetChannelOrderName(format->image_channel_order),
-                     GetChannelTypeName(format->image_channel_data_type),
-                     (int)get_format_channel_count(format),
-                     sampler->filter_mode == CL_FILTER_NEAREST
-                         ? "CL_FILTER_NEAREST"
-                         : "CL_FILTER_LINEAR",
+            log_info( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                     GetChannelTypeName( format->image_channel_data_type ),
+                     (int)get_format_channel_count( format ),
+                     sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                      addressMode,
                      normalizedNames[sampler->normalized_coords ? 1 : 0],
-                     t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
+                     t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
     }
     else
     {
-        if (err)
-            log_error("[%-7s %-24s %d] - %s - %s - %s\n",
-                      GetChannelOrderName(format->image_channel_order),
-                      GetChannelTypeName(format->image_channel_data_type),
-                      (int)get_format_channel_count(format),
-                      sampler->filter_mode == CL_FILTER_NEAREST
-                          ? "CL_FILTER_NEAREST"
-                          : "CL_FILTER_LINEAR",
+        if( err )
+            log_error( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                      GetChannelTypeName( format->image_channel_data_type ),
+                      (int)get_format_channel_count( format ),
+                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                       addressMode,
-                      normalizedNames[sampler->normalized_coords ? 1 : 0]);
+                      normalizedNames[sampler->normalized_coords ? 1 : 0] );
         else
-            log_info("[%-7s %-24s %d] - %s - %s - %s\n",
-                     GetChannelOrderName(format->image_channel_order),
-                     GetChannelTypeName(format->image_channel_data_type),
-                     (int)get_format_channel_count(format),
-                     sampler->filter_mode == CL_FILTER_NEAREST
-                         ? "CL_FILTER_NEAREST"
-                         : "CL_FILTER_LINEAR",
+            log_info( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                     GetChannelTypeName( format->image_channel_data_type ),
+                     (int)get_format_channel_count( format ),
+                     sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                      addressMode,
-                     normalizedNames[sampler->normalized_coords ? 1 : 0]);
+                     normalizedNames[sampler->normalized_coords ? 1 : 0] );
     }
+
 }
 
-void print_write_header(const cl_image_format *format, bool err = false)
+void print_write_header( cl_image_format *format, bool err = false)
 {
-    if (err)
-        log_error("[%-7s %-24s %d]\n",
-                  GetChannelOrderName(format->image_channel_order),
-                  GetChannelTypeName(format->image_channel_data_type),
-                  (int)get_format_channel_count(format));
+    if( err )
+        log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                  GetChannelTypeName( format->image_channel_data_type ),
+                  (int)get_format_channel_count( format ) );
     else
-        log_info("[%-7s %-24s %d]\n",
-                 GetChannelOrderName(format->image_channel_order),
-                 GetChannelTypeName(format->image_channel_data_type),
-                 (int)get_format_channel_count(format));
+        log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                 GetChannelTypeName( format->image_channel_data_type ),
+                 (int)get_format_channel_count( format ) );
 }
 
 
-void print_header(const cl_image_format *format, bool err = false)
+void print_header( cl_image_format *format, bool err = false )
 {
-    if (err)
-    {
-        log_error("[%-7s %-24s %d]\n",
-                  GetChannelOrderName(format->image_channel_order),
-                  GetChannelTypeName(format->image_channel_data_type),
-                  (int)get_format_channel_count(format));
-    }
-    else
-    {
-        log_info("[%-7s %-24s %d]\n",
-                 GetChannelOrderName(format->image_channel_order),
-                 GetChannelTypeName(format->image_channel_data_type),
-                 (int)get_format_channel_count(format));
+    if (err) {
+        log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                  GetChannelTypeName( format->image_channel_data_type ),
+                  (int)get_format_channel_count( format ) );
+    } else {
+        log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                 GetChannelTypeName( format->image_channel_data_type ),
+                 (int)get_format_channel_count( format ) );
     }
 }
 
-bool find_format(cl_image_format *formatList, unsigned int numFormats,
-                 cl_image_format *formatToFind)
+bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind )
 {
-    for (unsigned int i = 0; i < numFormats; i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
     {
-        if (formatList[i].image_channel_order
-                == formatToFind->image_channel_order
-            && formatList[i].image_channel_data_type
-                == formatToFind->image_channel_data_type)
+        if( formatList[ i ].image_channel_order == formatToFind->image_channel_order &&
+           formatList[ i ].image_channel_data_type == formatToFind->image_channel_data_type )
             return true;
     }
     return false;
 }
 
-void build_required_image_formats(
-    cl_mem_flags flags, cl_mem_object_type image_type, cl_device_id device,
-    std::vector<cl_image_format> &formatsToSupport)
+void build_required_image_formats(cl_mem_flags flags,
+                                  cl_mem_object_type image_type,
+                                  cl_device_id device,
+                                  std::vector<cl_image_format>& formatsToSupport)
 {
-    formatsToSupport.clear();
+	Version version = get_device_cl_version(device);
 
-    // Minimum list of supported image formats for reading or writing (embedded
-    // profile)
-    static std::vector<cl_image_format> embeddedProfile_readOrWrite{
-        // clang-format off
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_UNORM_INT16 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        // clang-format on
-    };
+	formatsToSupport.clear();
 
-    // Minimum list of required image formats for reading or writing
-    // num_channels, for all image types.
-    static std::vector<cl_image_format> fullProfile_readOrWrite{
-        // clang-format off
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_UNORM_INT16 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        { CL_BGRA, CL_UNORM_INT8 },
-        // clang-format on
-    };
+	// Required embedded formats.
+	static std::vector<cl_image_format> embeddedProfReadOrWriteFormats
+	{
+		{ CL_RGBA, CL_UNORM_INT8 },
+		{ CL_RGBA, CL_UNORM_INT16 },
+		{ CL_RGBA, CL_SIGNED_INT8 },
+		{ CL_RGBA, CL_SIGNED_INT16 },
+		{ CL_RGBA, CL_SIGNED_INT32 },
+		{ CL_RGBA, CL_UNSIGNED_INT8 },
+		{ CL_RGBA, CL_UNSIGNED_INT16 },
+		{ CL_RGBA, CL_UNSIGNED_INT32 },
+		{ CL_RGBA, CL_HALF_FLOAT },
+		{ CL_RGBA, CL_FLOAT },
+	};
 
-    // Minimum list of supported image formats for reading or writing
-    // (OpenCL 2.0, 2.1, or 2.2), for all image types.
-    static std::vector<cl_image_format> fullProfile_2x_readOrWrite{
-        // clang-format off
-        { CL_R, CL_UNORM_INT8 },
-        { CL_R, CL_UNORM_INT16 },
-        { CL_R, CL_SNORM_INT8 },
-        { CL_R, CL_SNORM_INT16 },
-        { CL_R, CL_SIGNED_INT8 },
-        { CL_R, CL_SIGNED_INT16 },
-        { CL_R, CL_SIGNED_INT32 },
-        { CL_R, CL_UNSIGNED_INT8 },
-        { CL_R, CL_UNSIGNED_INT16 },
-        { CL_R, CL_UNSIGNED_INT32 },
-        { CL_R, CL_HALF_FLOAT },
-        { CL_R, CL_FLOAT },
-        { CL_RG, CL_UNORM_INT8 },
-        { CL_RG, CL_UNORM_INT16 },
-        { CL_RG, CL_SNORM_INT8 },
-        { CL_RG, CL_SNORM_INT16 },
-        { CL_RG, CL_SIGNED_INT8 },
-        { CL_RG, CL_SIGNED_INT16 },
-        { CL_RG, CL_SIGNED_INT32 },
-        { CL_RG, CL_UNSIGNED_INT8 },
-        { CL_RG, CL_UNSIGNED_INT16 },
-        { CL_RG, CL_UNSIGNED_INT32 },
-        { CL_RG, CL_HALF_FLOAT },
-        { CL_RG, CL_FLOAT },
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_UNORM_INT16 },
-        { CL_RGBA, CL_SNORM_INT8 },
-        { CL_RGBA, CL_SNORM_INT16 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        { CL_BGRA, CL_UNORM_INT8 },
-        // clang-format on
-    };
+	/*
+		Required full profile formats.
+		This array does not contain any full profile
+		formats that have restrictions on when they
+		are required.
+	*/
+	static std::vector<cl_image_format> fullProfReadOrWriteFormats
+	{
+		{ CL_RGBA, CL_UNORM_INT8 },
+		{ CL_RGBA, CL_UNORM_INT16 },
+		{ CL_RGBA, CL_SIGNED_INT8 },
+		{ CL_RGBA, CL_SIGNED_INT16 },
+		{ CL_RGBA, CL_SIGNED_INT32 },
+		{ CL_RGBA, CL_UNSIGNED_INT8 },
+		{ CL_RGBA, CL_UNSIGNED_INT16 },
+		{ CL_RGBA, CL_UNSIGNED_INT32 },
+		{ CL_RGBA, CL_HALF_FLOAT },
+		{ CL_RGBA, CL_FLOAT },
+		{ CL_BGRA, CL_UNORM_INT8 },
+	};
 
-    // Conditional addition to the 2x readOrWrite table:
-    // Support for the CL_DEPTH image channel order is required only for 2D
-    // images and 2D image arrays.
-    static std::vector<cl_image_format> fullProfile_2x_readOrWrite_Depth{
-        // clang-format off
-        { CL_DEPTH, CL_UNORM_INT16 },
-        { CL_DEPTH, CL_FLOAT },
-        // clang-format on
-    };
+	/*
+		Required full profile formats specifically for 2.x.
+		This array does not contain any full profile
+		formats that have restrictions on when they
+		are required.
+	*/
+	static std::vector<cl_image_format> fullProf2XReadOrWriteFormats
+	{
+		{ CL_R, CL_UNORM_INT8 },
+		{ CL_R, CL_UNORM_INT16 },
+		{ CL_R, CL_SNORM_INT8 },
+		{ CL_R, CL_SNORM_INT16 },
+		{ CL_R, CL_SIGNED_INT8 },
+		{ CL_R, CL_SIGNED_INT16 },
+		{ CL_R, CL_SIGNED_INT32 },
+		{ CL_R, CL_UNSIGNED_INT8 },
+		{ CL_R, CL_UNSIGNED_INT16 },
+		{ CL_R, CL_UNSIGNED_INT32 },
+		{ CL_R, CL_HALF_FLOAT },
+		{ CL_R, CL_FLOAT },
+		{ CL_RG, CL_UNORM_INT8 },
+		{ CL_RG, CL_UNORM_INT16 },
+		{ CL_RG, CL_SNORM_INT8 },
+		{ CL_RG, CL_SNORM_INT16 },
+		{ CL_RG, CL_SIGNED_INT8 },
+		{ CL_RG, CL_SIGNED_INT16 },
+		{ CL_RG, CL_SIGNED_INT32 },
+		{ CL_RG, CL_UNSIGNED_INT8 },
+		{ CL_RG, CL_UNSIGNED_INT16 },
+		{ CL_RG, CL_UNSIGNED_INT32 },
+		{ CL_RG, CL_HALF_FLOAT },
+		{ CL_RG, CL_FLOAT },
+		{ CL_RGBA, CL_SNORM_INT8 },
+		{ CL_RGBA, CL_SNORM_INT16 },
+	};
 
-    // Conditional addition to the 2x readOrWrite table:
-    // Support for reading from the CL_sRGBA image channel order is optional for
-    // 1D image buffers. Support for writing to the CL_sRGBA image channel order
-    // is optional for all image types.
-    static std::vector<cl_image_format> fullProfile_2x_readOrWrite_srgb{
-        { CL_sRGBA, CL_UNORM_INT8 },
-    };
+	/*
+		Required full profile formats for CL_DEPTH
+		(specifically 2.x).
+		There are cases whereby the format isn't required.
+	*/
+	static std::vector<cl_image_format> fullProf2XReadOrWriteDepthFormats
+	{
+		{ CL_DEPTH, CL_UNORM_INT16 },
+		{ CL_DEPTH, CL_FLOAT },
+	};
 
-    // Minimum list of required image formats for reading and writing.
-    static std::vector<cl_image_format> fullProfile_readAndWrite{
-        // clang-format off
-        { CL_R, CL_UNORM_INT8 },
-        { CL_R, CL_SIGNED_INT8 },
-        { CL_R, CL_SIGNED_INT16 },
-        { CL_R, CL_SIGNED_INT32 },
-        { CL_R, CL_UNSIGNED_INT8 },
-        { CL_R, CL_UNSIGNED_INT16 },
-        { CL_R, CL_UNSIGNED_INT32 },
-        { CL_R, CL_HALF_FLOAT },
-        { CL_R, CL_FLOAT },
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        // clang-format on
-    };
+	/*
+		Required full profile formats for CL_sRGB
+		(specifically 2.x).
+		There are cases whereby the format isn't required.
+	*/
+	static std::vector<cl_image_format> fullProf2XSRGBFormats
+	{
+		{ CL_sRGBA, CL_UNORM_INT8 },
+	};
 
-    // Embedded profile
-    if (gIsEmbedded)
-    {
-        copy(embeddedProfile_readOrWrite.begin(),
-             embeddedProfile_readOrWrite.end(),
-             back_inserter(formatsToSupport));
-    }
-    // Full profile
-    else
-    {
-        Version version = get_device_cl_version(device);
-        if (version < Version(2, 0) || version >= Version(3, 0))
-        {
-            // Full profile, OpenCL 1.2 or 3.0.
-            if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
-            {
-                // Note: assumes that read-write images are supported!
-                copy(fullProfile_readAndWrite.begin(),
-                     fullProfile_readAndWrite.end(),
-                     back_inserter(formatsToSupport));
-            }
-            else
-            {
-                copy(fullProfile_readOrWrite.begin(),
-                     fullProfile_readOrWrite.end(),
-                     back_inserter(formatsToSupport));
-            }
-        }
-        else
-        {
-            // Full profile, OpenCL 2.0, 2.1, 2.2.
-            if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
-            {
-                copy(fullProfile_readAndWrite.begin(),
-                     fullProfile_readAndWrite.end(),
-                     back_inserter(formatsToSupport));
-            }
-            else
-            {
-                copy(fullProfile_2x_readOrWrite.begin(),
-                     fullProfile_2x_readOrWrite.end(),
-                     back_inserter(formatsToSupport));
+	// Embedded profile
+	if (gIsEmbedded)
+	{
+		copy(embeddedProfReadOrWriteFormats.begin(),
+		     embeddedProfReadOrWriteFormats.end(),
+		     back_inserter(formatsToSupport));
+	}
+	// Full profile
+	else
+	{
+		copy(fullProfReadOrWriteFormats.begin(),
+		     fullProfReadOrWriteFormats.end(),
+		     back_inserter(formatsToSupport));
+	}
 
-                // Support for the CL_DEPTH image channel order is required only
-                // for 2D images and 2D image arrays.
-                if (image_type == CL_MEM_OBJECT_IMAGE2D
-                    || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-                {
-                    copy(fullProfile_2x_readOrWrite_Depth.begin(),
-                         fullProfile_2x_readOrWrite_Depth.end(),
-                         back_inserter(formatsToSupport));
-                }
+	// Full profile, OpenCL 2.0, 2.1, 2.2
+	if (!gIsEmbedded && version >= Version(2, 0) && version <= Version(2, 2))
+	{
+		copy(fullProf2XReadOrWriteFormats.begin(),
+		     fullProf2XReadOrWriteFormats.end(),
+		     back_inserter(formatsToSupport));
 
-                // Support for reading from the CL_sRGBA image channel order is
-                // optional for 1D image buffers. Support for writing to the
-                // CL_sRGBA image channel order is optional for all image types.
-                if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER
-                    && flags == CL_MEM_READ_ONLY)
-                {
-                    copy(fullProfile_2x_readOrWrite_srgb.begin(),
-                         fullProfile_2x_readOrWrite_srgb.end(),
-                         back_inserter(formatsToSupport));
-                }
-            }
-        }
-    }
+		// Depth images are only required for 2DArray and 2D images
+		if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D)
+		{
+			copy(fullProf2XReadOrWriteDepthFormats.begin(),
+			     fullProf2XReadOrWriteDepthFormats.end(),
+			     back_inserter(formatsToSupport));
+		}
+
+		// sRGB is not required for 1DImage Buffers
+		if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
+		{
+			// sRGB is only required for reading
+			if (flags == CL_MEM_READ_ONLY)
+			{
+				copy(fullProf2XSRGBFormats.begin(),
+				     fullProf2XSRGBFormats.end(),
+				     back_inserter(formatsToSupport));
+			}
+		}
+	}
 }
 
-bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
+bool is_image_format_required(cl_image_format format,
+                              cl_mem_flags flags,
                               cl_mem_object_type image_type,
                               cl_device_id device)
 {
-    std::vector<cl_image_format> formatsToSupport;
-    build_required_image_formats(flags, image_type, device, formatsToSupport);
+	std::vector<cl_image_format> formatsToSupport;
+	build_required_image_formats(flags, image_type, device, formatsToSupport);
 
-    for (auto &formatItr : formatsToSupport)
-    {
-        if (formatItr.image_channel_order == format.image_channel_order
-            && formatItr.image_channel_data_type
-                == format.image_channel_data_type)
-        {
-            return true;
-        }
-    }
+	for (auto &formatItr: formatsToSupport)
+	{
+		if (formatItr.image_channel_order == format.image_channel_order &&
+		    formatItr.image_channel_data_type == format.image_channel_data_type)
+		{
+			return true;
+		}
+	}
 
-    return false;
+	return false;
 }
 
-cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth)
+cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth)
 {
-    cl_uint retMaxMipLevels = 0, max_dim = 0;
+  cl_uint retMaxMipLevels=0, max_dim = 0;
 
-    max_dim = width;
-    max_dim = height > max_dim ? height : max_dim;
-    max_dim = depth > max_dim ? depth : max_dim;
+  max_dim = width;
+  max_dim = height > max_dim ? height : max_dim;
+  max_dim = depth > max_dim ? depth : max_dim;
 
-    while (max_dim)
-    {
-        retMaxMipLevels++;
-        max_dim >>= 1;
-    }
-    return retMaxMipLevels;
+  while(max_dim) {
+    retMaxMipLevels++;
+    max_dim >>= 1;
+  }
+  return retMaxMipLevels;
 }
 
-cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo)
+cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo)
 {
-    cl_ulong retSize = 0;
-    size_t curr_width, curr_height, curr_depth, curr_array_size;
-    curr_width = imageInfo.width;
-    curr_height = imageInfo.height;
-    curr_depth = imageInfo.depth;
-    curr_array_size = imageInfo.arraySize;
+  cl_ulong retSize = 0;
+  size_t curr_width, curr_height, curr_depth, curr_array_size;
+  curr_width = imageInfo.width;
+  curr_height = imageInfo.height;
+  curr_depth = imageInfo.depth;
+  curr_array_size = imageInfo.arraySize;
 
-    for (int i = 0; i < (int)imageInfo.num_mip_levels; i++)
+  for (int i=0; i < (int) imageInfo.num_mip_levels; i++)
+  {
+    switch ( imageInfo.type )
     {
-        switch (imageInfo.type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                retSize += (cl_ulong)curr_width * curr_height * curr_depth
-                    * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE2D:
-                retSize += (cl_ulong)curr_width * curr_height
-                    * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D:
-                retSize +=
-                    (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                retSize += (cl_ulong)curr_width * curr_array_size
-                    * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                retSize += (cl_ulong)curr_width * curr_height * curr_array_size
-                    * get_pixel_size(imageInfo.format);
-                break;
-        }
-
-        switch (imageInfo.type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                curr_depth = curr_depth >> 1 ? curr_depth >> 1 : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                curr_height = curr_height >> 1 ? curr_height >> 1 : 1;
-            case CL_MEM_OBJECT_IMAGE1D:
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                curr_width = curr_width >> 1 ? curr_width >> 1 : 1;
-        }
+    case CL_MEM_OBJECT_IMAGE3D :
+      retSize += (cl_ulong)curr_width * curr_height * curr_depth * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE2D :
+      retSize += (cl_ulong)curr_width * curr_height * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE1D :
+      retSize += (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY :
+      retSize += (cl_ulong)curr_width * curr_array_size * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+      retSize += (cl_ulong)curr_width * curr_height * curr_array_size * get_pixel_size(imageInfo.format);
+      break;
     }
 
-    return retSize;
+    switch ( imageInfo.type )
+    {
+    case CL_MEM_OBJECT_IMAGE3D :
+      curr_depth = curr_depth >> 1 ? curr_depth >> 1: 1;
+    case CL_MEM_OBJECT_IMAGE2D :
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+      curr_height = curr_height >> 1? curr_height >> 1 : 1;
+    case CL_MEM_OBJECT_IMAGE1D :
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY :
+      curr_width = curr_width >> 1? curr_width >> 1 : 1;
+    }
+  }
+
+  return retSize;
 }
 
-size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod)
+size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod)
 {
-    size_t retOffset = 0;
-    size_t width, height, depth;
-    width = imageInfo->width;
-    height = imageInfo->height;
-    depth = imageInfo->depth;
+  size_t retOffset = 0;
+  size_t width, height,  depth;
+  width = imageInfo->width;
+  height = imageInfo->height;
+  depth = imageInfo->depth;
 
-    for (size_t i = 0; i < lod; i++)
+  for(size_t i=0; i < lod; i++)
+  {
+    switch(imageInfo->type)
     {
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                retOffset += (size_t)width * height * imageInfo->arraySize
-                    * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE3D:
-                retOffset += (size_t)width * height * depth
-                    * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                retOffset += (size_t)width * imageInfo->arraySize
-                    * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE2D:
-                retOffset +=
-                    (size_t)width * height * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D:
-                retOffset += (size_t)width * get_pixel_size(imageInfo->format);
-                break;
-        }
-
-        // Compute next lod dimensions
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D: depth = (depth >> 1) ? (depth >> 1) : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                height = (height >> 1) ? (height >> 1) : 1;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE1D: width = (width >> 1) ? (width >> 1) : 1;
-        }
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      retOffset += (size_t) width * height * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE3D:
+      retOffset += (size_t) width * height * depth * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      retOffset += (size_t) width * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE2D:
+      retOffset += (size_t) width * height * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE1D:
+      retOffset += (size_t) width * get_pixel_size( imageInfo->format );
+      break;
     }
-    return retOffset;
-}
 
-const char *convert_image_type_to_string(cl_mem_object_type image_type)
-{
-    switch (image_type)
+    // Compute next lod dimensions
+    switch(imageInfo->type)
     {
-        case CL_MEM_OBJECT_IMAGE1D: return "1D";
-        case CL_MEM_OBJECT_IMAGE2D: return "2D";
-        case CL_MEM_OBJECT_IMAGE3D: return "3D";
-        case CL_MEM_OBJECT_IMAGE1D_ARRAY: return "1D array";
-        case CL_MEM_OBJECT_IMAGE2D_ARRAY: return "2D array";
-        case CL_MEM_OBJECT_IMAGE1D_BUFFER: return "1D image buffer";
-        default: return "unrecognized object type";
+    case CL_MEM_OBJECT_IMAGE3D:
+      depth = ( depth >> 1 ) ? ( depth >> 1 ) : 1;
+    case CL_MEM_OBJECT_IMAGE2D:
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      height = ( height >> 1 ) ? ( height >> 1 ) : 1;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE1D:
+      width = ( width >> 1 ) ? ( width >> 1 ) : 1;
     }
+
+  }
+  return retOffset;
 }

diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index 848ec65..26c9760 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -41,13 +41,10 @@
 #include "rounding_mode.h"
 #include "clImageHelper.h"
 
-#include <CL/cl_half.h>
-
 extern cl_device_type gDeviceType;
 extern bool gTestRounding;
 
-// Number of iterations per image format to test if not testing max images,
-// rounding, or small images
+// Number of iterations per image format to test if not testing max images, rounding, or small images
 #define NUM_IMAGE_ITERATIONS 3
 
 
@@ -56,64 +53,51 @@
 #define MAX_lRGB_TO_sRGB_CONVERSION_ERROR 0.6
 
 // Definition for our own sampler type, to mirror the cl_sampler internals
-typedef struct
-{
-    cl_addressing_mode addressing_mode;
-    cl_filter_mode filter_mode;
-    bool normalized_coords;
+typedef struct {
+ cl_addressing_mode addressing_mode;
+ cl_filter_mode     filter_mode;
+ bool               normalized_coords;
 } image_sampler_data;
 
-int round_to_even(float v);
+int round_to_even( float v );
 
-#define NORMALIZE(v, max) (v < 0 ? 0 : (v > 1.f ? max : round_to_even(v * max)))
-#define NORMALIZE_UNROUNDED(v, max) (v < 0 ? 0 : (v > 1.f ? max : v * max))
-#define NORMALIZE_SIGNED(v, min, max)                                          \
-    (v < -1.0f ? min : (v > 1.f ? max : round_to_even(v * max)))
-#define NORMALIZE_SIGNED_UNROUNDED(v, min, max)                                \
-    (v < -1.0f ? min : (v > 1.f ? max : v * max))
-#define CONVERT_INT(v, min, max, max_val)                                      \
-    (v < min ? min : (v > max ? max_val : round_to_even(v)))
-#define CONVERT_UINT(v, max, max_val)                                          \
-    (v < 0 ? 0 : (v > max ? max_val : round_to_even(v)))
+#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max :  v * max ) )
+#define NORMALIZE_SIGNED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : v * max ) )
+#define CONVERT_INT( v, min, max, max_val)  ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
+#define CONVERT_UINT( v, max, max_val)  ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
 
-extern void print_read_header(const cl_image_format *format,
-                              image_sampler_data *sampler, bool err = false,
-                              int t = 0);
-extern void print_write_header(const cl_image_format *format, bool err);
-extern void print_header(const cl_image_format *format, bool err);
-extern bool find_format(cl_image_format *formatList, unsigned int numFormats,
-                        cl_image_format *formatToFind);
-extern bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
+extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
+extern void print_write_header( cl_image_format *format, bool err);
+extern void print_header( cl_image_format *format, bool err );
+extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
+extern bool is_image_format_required(cl_image_format format,
+                                     cl_mem_flags flags,
                                      cl_mem_object_type image_type,
                                      cl_device_id device);
-extern void
-build_required_image_formats(cl_mem_flags flags, cl_mem_object_type image_type,
-                             cl_device_id device,
-                             std::vector<cl_image_format> &formatsToSupport);
+extern void build_required_image_formats(cl_mem_flags flags,
+                                         cl_mem_object_type image_type,
+                                         cl_device_id device,
+                                         std::vector<cl_image_format>& formatsToSupport);
 
-extern uint32_t get_format_type_size(const cl_image_format *format);
-extern uint32_t get_channel_data_type_size(cl_channel_type channelType);
-extern uint32_t get_format_channel_count(const cl_image_format *format);
-extern uint32_t get_channel_order_channel_count(cl_channel_order order);
-cl_channel_type get_channel_type_from_name(const char *name);
-cl_channel_order get_channel_order_from_name(const char *name);
-extern int is_format_signed(const cl_image_format *format);
-extern uint32_t get_pixel_size(const cl_image_format *format);
+extern size_t get_format_type_size( const cl_image_format *format );
+extern size_t get_channel_data_type_size( cl_channel_type channelType );
+extern size_t get_format_channel_count( const cl_image_format *format );
+extern size_t get_channel_order_channel_count( cl_channel_order order );
+cl_channel_type  get_channel_type_from_name( const char *name );
+cl_channel_order  get_channel_order_from_name( const char *name );
+extern int    is_format_signed( const cl_image_format *format );
+extern size_t get_pixel_size( cl_image_format *format );
 
 /* Helper to get any ol image format as long as it is 8-bits-per-channel */
-extern int get_8_bit_image_format(cl_context context,
-                                  cl_mem_object_type objType,
-                                  cl_mem_flags flags, size_t channelCount,
-                                  cl_image_format *outFormat);
+extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
 
 /* Helper to get any ol image format as long as it is 32-bits-per-channel */
-extern int get_32_bit_image_format(cl_context context,
-                                   cl_mem_object_type objType,
-                                   cl_mem_flags flags, size_t channelCount,
-                                   cl_image_format *outFormat);
+extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
 
-int random_in_range(int minV, int maxV, MTdata d);
-int random_log_in_range(int minV, int maxV, MTdata d);
+int random_in_range( int minV, int maxV, MTdata d );
+int random_log_in_range( int minV, int maxV, MTdata d );
 
 typedef struct
 {
@@ -123,7 +107,7 @@
     size_t rowPitch;
     size_t slicePitch;
     size_t arraySize;
-    const cl_image_format *format;
+    cl_image_format *format;
     cl_mem buffer;
     cl_mem_object_type type;
     cl_uint num_mip_levels;
@@ -132,394 +116,376 @@
 typedef struct
 {
     float p[4];
-} FloatPixel;
-
-void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
-                                        const char *destPixel,
-                                        image_descriptor *imageInfo, size_t y,
-                                        size_t thirdDim);
+}FloatPixel;
 
 void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
-                   size_t sizes[][3], size_t maxWidth, size_t maxHeight,
-                   size_t maxDepth, size_t maxArraySize,
-                   const cl_ulong maxIndividualAllocSize,
-                   const cl_ulong maxTotalAllocSize,
-                   cl_mem_object_type image_type, const cl_image_format *format,
-                   int usingMaxPixelSize = 0);
-extern size_t get_format_max_int(const cl_image_format *format);
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSize=0);
+extern size_t get_format_max_int( cl_image_format *format );
 
-extern cl_ulong get_image_size(image_descriptor const *imageInfo);
-extern cl_ulong get_image_size_mb(image_descriptor const *imageInfo);
+extern cl_ulong get_image_size( image_descriptor const *imageInfo );
+extern cl_ulong get_image_size_mb( image_descriptor const *imageInfo );
 
-extern char *generate_random_image_data(image_descriptor *imageInfo,
-                                        BufferOwningPtr<char> &Owner, MTdata d);
+extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
 
-extern int debug_find_vector_in_image(void *imagePtr,
-                                      image_descriptor *imageInfo,
-                                      void *vectorToFind, size_t vectorSize,
-                                      int *outX, int *outY, int *outZ,
-                                      size_t lod = 0);
+extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                      void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod = 0 );
 
-extern int debug_find_pixel_in_image(void *imagePtr,
-                                     image_descriptor *imageInfo,
-                                     unsigned int *valuesToFind, int *outX,
-                                     int *outY, int *outZ, int lod = 0);
-extern int debug_find_pixel_in_image(void *imagePtr,
-                                     image_descriptor *imageInfo,
-                                     int *valuesToFind, int *outX, int *outY,
-                                     int *outZ, int lod = 0);
-extern int debug_find_pixel_in_image(void *imagePtr,
-                                     image_descriptor *imageInfo,
-                                     float *valuesToFind, int *outX, int *outY,
-                                     int *outZ, int lod = 0);
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     float *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
 
-extern void copy_image_data(image_descriptor *srcImageInfo,
-                            image_descriptor *dstImageInfo, void *imageValues,
-                            void *destImageValues, const size_t sourcePos[],
-                            const size_t destPos[], const size_t regionSize[]);
+extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                            const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
 
-int has_alpha(const cl_image_format *format);
+int has_alpha(cl_image_format *format);
 
 extern bool is_sRGBA_order(cl_channel_order image_channel_order);
 
-inline float calculate_array_index(float coord, float extent);
+inline float calculate_array_index( float coord, float extent );
 
-cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth);
-cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo);
-size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod);
+cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth);
+cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo);
+size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod);
 
-template <class T>
-void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
-                      int y, int z, T *outData, int lod)
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData, int lod )
 {
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth,
-           slice_pitch_lod = 0 /*imageInfo->slicePitch*/,
-           row_pitch_lod = 0 /*imageInfo->rowPitch*/;
-    width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+    float convert_half_to_float( unsigned short halfValue );
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth, slice_pitch_lod = 0/*imageInfo->slicePitch*/ , row_pitch_lod = 0/*imageInfo->rowPitch*/;
+    width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod):1;
 
-    if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY
-        && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
-        height_lod =
-            (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+    if ( imageInfo->type  != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
+        height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod):1;
 
-    if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-        depth_lod = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-    row_pitch_lod = (imageInfo->num_mip_levels > 0)
-        ? (width_lod * get_pixel_size(imageInfo->format))
-        : imageInfo->rowPitch;
-    slice_pitch_lod = (imageInfo->num_mip_levels > 0)
-        ? (row_pitch_lod * height_lod)
-        : imageInfo->slicePitch;
+    if(imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+       depth_lod = ( imageInfo->depth >> lod) ? ( imageInfo->depth >> lod) : 1;
+    row_pitch_lod = (imageInfo->num_mip_levels > 0)? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+    slice_pitch_lod = (imageInfo->num_mip_levels > 0)? (row_pitch_lod * height_lod): imageInfo->slicePitch;
 
     // correct depth_lod and height_lod for array image types in order to avoid
     // return
-    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1
-        && depth_lod == 1)
-    {
-        depth_lod = 0;
-        height_lod = 0;
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1 && depth_lod == 1) {
+    depth_lod = 0;
+    height_lod = 0;
+
     }
 
-    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1)
-    {
-        depth_lod = 0;
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1) {
+      depth_lod = 0;
     }
 
-    if (x < 0 || x >= (int)width_lod
-        || (height_lod != 0 && (y < 0 || y >= (int)height_lod))
-        || (depth_lod != 0 && (z < 0 || z >= (int)depth_lod))
-        || (imageInfo->arraySize != 0
-            && (z < 0 || z >= (int)imageInfo->arraySize)))
+    if ( x < 0 || x >= (int)width_lod
+               || ( height_lod != 0 && ( y < 0 || y >= (int)height_lod ) )
+               || ( depth_lod != 0 && ( z < 0 || z >= (int)depth_lod ) )
+               || ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
     {
         // Border color
         if (imageInfo->format->image_channel_order == CL_DEPTH)
         {
-            outData[0] = 1;
+            outData[ 0 ] = 1;
         }
-        else
-        {
-            outData[0] = outData[1] = outData[2] = outData[3] = 0;
-            if (!has_alpha(imageInfo->format)) outData[3] = 1;
+        else {
+            outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+            if (!has_alpha(imageInfo->format))
+                outData[3] = 1;
         }
         return;
     }
 
-    const cl_image_format *format = imageInfo->format;
+    cl_image_format *format = imageInfo->format;
 
     unsigned int i;
-    T tempData[4];
+    T tempData[ 4 ];
 
     // Advance to the right spot
     char *ptr = (char *)imageData;
-    size_t pixelSize = get_pixel_size(format);
+    size_t pixelSize = get_pixel_size( format );
 
     ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
 
     // OpenCL only supports reading floats from certain formats
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
     {
-        case CL_SNORM_INT8: {
+        case CL_SNORM_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             cl_uchar *dPtr = (cl_uchar *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_UNSIGNED_INT8: {
-            cl_uchar *dPtr = (cl_uchar *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+        case CL_UNSIGNED_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar*)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_HALF_FLOAT: {
-            cl_half *dPtr = (cl_half *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)cl_half_to_float(dPtr[i]);
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)convert_half_to_float( dPtr[ i ] );
             break;
         }
 
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             cl_int *dPtr = (cl_int *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 
-        case CL_UNORM_SHORT_565: {
-            cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (T)(dPtr[0] >> 11);
-            tempData[1] = (T)((dPtr[0] >> 5) & 63);
-            tempData[2] = (T)(dPtr[0] & 31);
+        case CL_UNORM_SHORT_565:
+        {
+            cl_ushort *dPtr = (cl_ushort*)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
             break;
         }
 
 #ifdef OBSOLETE_FORMAT
-        case CL_UNORM_SHORT_565_REV: {
+        case CL_UNORM_SHORT_565_REV:
+        {
             unsigned short *dPtr = (unsigned short *)ptr;
-            tempData[2] = (T)(dPtr[0] >> 11);
-            tempData[1] = (T)((dPtr[0] >> 5) & 63);
-            tempData[0] = (T)(dPtr[0] & 31);
+            tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
             break;
         }
 
-        case CL_UNORM_SHORT_555_REV: {
+        case CL_UNORM_SHORT_555_REV:
+        {
             unsigned short *dPtr = (unsigned short *)ptr;
-            tempData[2] = (T)((dPtr[0] >> 10) & 31);
-            tempData[1] = (T)((dPtr[0] >> 5) & 31);
-            tempData[0] = (T)(dPtr[0] & 31);
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
             break;
         }
 
-        case CL_UNORM_INT_8888: {
+        case CL_UNORM_INT_8888:
+        {
             unsigned int *dPtr = (unsigned int *)ptr;
-            tempData[3] = (T)(dPtr[0] >> 24);
-            tempData[2] = (T)((dPtr[0] >> 16) & 0xff);
-            tempData[1] = (T)((dPtr[0] >> 8) & 0xff);
-            tempData[0] = (T)(dPtr[0] & 0xff);
+            tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
             break;
         }
-        case CL_UNORM_INT_8888_REV: {
+        case CL_UNORM_INT_8888_REV:
+        {
             unsigned int *dPtr = (unsigned int *)ptr;
-            tempData[0] = (T)(dPtr[0] >> 24);
-            tempData[1] = (T)((dPtr[0] >> 16) & 0xff);
-            tempData[2] = (T)((dPtr[0] >> 8) & 0xff);
-            tempData[3] = (T)(dPtr[0] & 0xff);
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
             break;
         }
 
-        case CL_UNORM_INT_101010_REV: {
+        case CL_UNORM_INT_101010_REV:
+        {
             unsigned int *dPtr = (unsigned int *)ptr;
-            tempData[2] = (T)((dPtr[0] >> 20) & 0x3ff);
-            tempData[1] = (T)((dPtr[0] >> 10) & 0x3ff);
-            tempData[0] = (T)(dPtr[0] & 0x3ff);
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
             break;
         }
 #endif
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (T)((dPtr[0] >> 10) & 31);
-            tempData[1] = (T)((dPtr[0] >> 5) & 31);
-            tempData[2] = (T)(dPtr[0] & 31);
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
             break;
         }
 
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            tempData[0] = (T)((dPtr[0] >> 20) & 0x3ff);
-            tempData[1] = (T)((dPtr[0] >> 10) & 0x3ff);
-            tempData[2] = (T)(dPtr[0] & 0x3ff);
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
             break;
         }
 
-        case CL_FLOAT: {
+        case CL_FLOAT:
+        {
             cl_float *dPtr = (cl_float *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
             break;
         }
 #ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE: {
+        case CL_SFIXED14_APPLE:
+        {
             cl_float *dPtr = (cl_float *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i] + 0x4000;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ] + 0x4000;
             break;
         }
 #endif
     }
 
 
-    outData[0] = outData[1] = outData[2] = 0;
-    outData[3] = 1;
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
 
-    if (format->image_channel_order == CL_A)
+    if( format->image_channel_order == CL_A )
     {
-        outData[3] = tempData[0];
+        outData[ 3 ] = tempData[ 0 ];
     }
-    else if (format->image_channel_order == CL_R)
+    else if( format->image_channel_order == CL_R   )
     {
-        outData[0] = tempData[0];
+        outData[ 0 ] = tempData[ 0 ];
     }
-    else if (format->image_channel_order == CL_Rx)
+    else if( format->image_channel_order == CL_Rx   )
     {
-        outData[0] = tempData[0];
+        outData[ 0 ] = tempData[ 0 ];
     }
-    else if (format->image_channel_order == CL_RA)
+    else if( format->image_channel_order == CL_RA )
     {
-        outData[0] = tempData[0];
-        outData[3] = tempData[1];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 1 ];
     }
-    else if (format->image_channel_order == CL_RG)
+    else if( format->image_channel_order == CL_RG  )
     {
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
     }
-    else if (format->image_channel_order == CL_RGx)
+    else if( format->image_channel_order == CL_RGx  )
     {
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
     }
-    else if ((format->image_channel_order == CL_RGB)
-             || (format->image_channel_order == CL_sRGB))
+    else if(( format->image_channel_order == CL_RGB  ) || ( format->image_channel_order == CL_sRGB  ))
     {
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
-        outData[2] = tempData[2];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
     }
-    else if ((format->image_channel_order == CL_RGBx)
-             || (format->image_channel_order == CL_sRGBx))
+    else if(( format->image_channel_order == CL_RGBx  ) || ( format->image_channel_order == CL_sRGBx  ))
     {
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
-        outData[2] = tempData[2];
-        outData[3] = 0;
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = 0;
     }
-    else if ((format->image_channel_order == CL_RGBA)
-             || (format->image_channel_order == CL_sRGBA))
+    else if(( format->image_channel_order == CL_RGBA ) || ( format->image_channel_order == CL_sRGBA ))
     {
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
-        outData[2] = tempData[2];
-        outData[3] = tempData[3];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = tempData[ 3 ];
     }
-    else if (format->image_channel_order == CL_ARGB)
+    else if( format->image_channel_order == CL_ARGB )
     {
-        outData[0] = tempData[1];
-        outData[1] = tempData[2];
-        outData[2] = tempData[3];
-        outData[3] = tempData[0];
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = tempData[ 0 ];
     }
-    else if ((format->image_channel_order == CL_BGRA)
-             || (format->image_channel_order == CL_sBGRA))
+    else if(( format->image_channel_order == CL_BGRA ) || ( format->image_channel_order == CL_sBGRA ))
     {
-        outData[0] = tempData[2];
-        outData[1] = tempData[1];
-        outData[2] = tempData[0];
-        outData[3] = tempData[3];
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 3 ];
     }
-    else if (format->image_channel_order == CL_INTENSITY)
+    else if( format->image_channel_order == CL_INTENSITY )
     {
-        outData[0] = tempData[0];
-        outData[1] = tempData[0];
-        outData[2] = tempData[0];
-        outData[3] = tempData[0];
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 0 ];
     }
-    else if (format->image_channel_order == CL_LUMINANCE)
+    else if( format->image_channel_order == CL_LUMINANCE )
     {
-        outData[0] = tempData[0];
-        outData[1] = tempData[0];
-        outData[2] = tempData[0];
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
     }
-    else if (format->image_channel_order == CL_DEPTH)
+    else if( format->image_channel_order == CL_DEPTH  )
     {
-        outData[0] = tempData[0];
+        outData[ 0 ] = tempData[ 0 ];
     }
 #ifdef CL_1RGB_APPLE
-    else if (format->image_channel_order == CL_1RGB_APPLE)
+    else if( format->image_channel_order == CL_1RGB_APPLE )
     {
-        outData[0] = tempData[1];
-        outData[1] = tempData[2];
-        outData[2] = tempData[3];
-        outData[3] = 0xff;
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = 0xff;
     }
 #endif
 #ifdef CL_BGR1_APPLE
-    else if (format->image_channel_order == CL_BGR1_APPLE)
+    else if( format->image_channel_order == CL_BGR1_APPLE )
     {
-        outData[0] = tempData[2];
-        outData[1] = tempData[1];
-        outData[2] = tempData[0];
-        outData[3] = 0xff;
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = 0xff;
     }
 #endif
     else
@@ -529,32 +495,27 @@
     }
 }
 
-template <class T>
-void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
-                      int y, int z, T *outData)
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData )
 {
-    read_image_pixel<T>(imageData, imageInfo, x, y, z, outData, 0);
+  read_image_pixel<T>( imageData, imageInfo, x, y, z, outData, 0);
 }
 
 // Stupid template rules
-bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
-                        size_t depth, image_sampler_data *imageSampler,
-                        image_descriptor *imageInfo, int &outX, int &outY,
-                        int &outZ);
-bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
+bool get_integer_coords( float x, float y, float z,
+                        size_t width, size_t height, size_t depth,
+                        image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                        int &outX, int &outY, int &outZ );
+bool get_integer_coords_offset( float x, float y, float z,
+                               float xAddressOffset, float yAddressOffset, float zAddressOffset,
                                size_t width, size_t height, size_t depth,
-                               image_sampler_data *imageSampler,
-                               image_descriptor *imageInfo, int &outX,
-                               int &outY, int &outZ);
+                               image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                               int &outX, int &outY, int &outZ );
 
 
-template <class T>
-void sample_image_pixel_offset(void *imageData, image_descriptor *imageInfo,
-                               float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
-                               image_sampler_data *imageSampler, T *outData,
-                               int lod)
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData, int lod )
 {
     int iX = 0, iY = 0, iZ = 0;
 
@@ -562,8 +523,7 @@
     float max_h;
     float max_d;
 
-    switch (imageInfo->type)
-    {
+    switch (imageInfo->type) {
         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
             max_h = imageInfo->arraySize;
             max_d = 0;
@@ -578,155 +538,120 @@
             break;
     }
 
-    if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
+    if( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
     {
-        switch (imageInfo->type)
-        {
+        switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE3D:
-                max_d = (float)((imageInfo->depth >> lod)
-                                    ? (imageInfo->depth >> lod)
-                                    : 1);
+                max_d = (float)((imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1);
             case CL_MEM_OBJECT_IMAGE2D:
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                max_h = (float)((imageInfo->height >> lod)
-                                    ? (imageInfo->height >> lod)
-                                    : 1);
+                max_h = (float)((imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1);
                 break;
-            default:;
+            default:
+                ;
+
         }
-        max_w =
-            (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
+        max_w = (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
     }
-    get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
-                              zAddressOffset, max_w, max_h, max_d, imageSampler,
-                              imageInfo, iX, iY, iZ);
+    get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
 
-    read_image_pixel<T>(imageData, imageInfo, iX, iY, iZ, outData, lod);
+    read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData, lod );
 }
 
-template <class T>
-void sample_image_pixel_offset(void *imageData, image_descriptor *imageInfo,
-                               float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
-                               image_sampler_data *imageSampler, T *outData)
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData)
 {
-    sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, xAddressOffset,
-                                 yAddressOffset, zAddressOffset, imageSampler,
-                                 outData, 0);
+  sample_image_pixel_offset<T>( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
+    imageSampler,  outData, 0);
 }
 
-template <class T>
-void sample_image_pixel(void *imageData, image_descriptor *imageInfo, float x,
-                        float y, float z, image_sampler_data *imageSampler,
-                        T *outData)
+template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, image_sampler_data *imageSampler, T *outData )
 {
-    return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f,
-                                        0.0f, 0.0f, imageSampler, outData);
+    return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
 }
 
-FloatPixel
-sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
-                         float y, float z, image_sampler_data *imageSampler,
-                         float *outData, int verbose, int *containsDenorms);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
 
-FloatPixel sample_image_pixel_float(void *imageData,
-                                    image_descriptor *imageInfo, float x,
-                                    float y, float z,
-                                    image_sampler_data *imageSampler,
-                                    float *outData, int verbose,
-                                    int *containsDenorms, int lod);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
 
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms);
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms, int lod);
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
 
 
-extern void pack_image_pixel(unsigned int *srcVector,
-                             const cl_image_format *imageFormat, void *outData);
-extern void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
-                             void *outData);
-extern void pack_image_pixel(float *srcVector,
-                             const cl_image_format *imageFormat, void *outData);
-extern void pack_image_pixel_error(const float *srcVector,
-                                   const cl_image_format *imageFormat,
-                                   const void *results, float *errors);
+extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results,  float *errors );
 
-extern char *create_random_image_data(ExplicitType dataType,
-                                      image_descriptor *imageInfo,
-                                      BufferOwningPtr<char> &P, MTdata d,
-                                      bool image2DFromBuffer = false);
+extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer = false );
 
 // deprecated
-// extern bool clamp_image_coord( image_sampler_data *imageSampler, float value,
-// size_t max, int &outValue );
+//extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
 
-extern void get_sampler_kernel_code(image_sampler_data *imageSampler,
-                                    char *outLine);
-extern float get_max_absolute_error(const cl_image_format *format,
-                                    image_sampler_data *sampler);
-extern float get_max_relative_error(const cl_image_format *format,
-                                    image_sampler_data *sampler, int is3D,
-                                    int isLinearFilter);
+extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
+extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
+extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
 
 
-#define errMax(_x, _y) ((_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y))
+#define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
 
-static inline cl_uint abs_diff_uint(cl_uint x, cl_uint y)
+static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
 {
     return y > x ? y - x : x - y;
 }
 
-static inline cl_uint abs_diff_int(cl_int x, cl_int y)
+static inline cl_uint abs_diff_int( cl_int x, cl_int y )
 {
-    return (cl_uint)(y > x ? y - x : x - y);
+    return (cl_uint) (y > x ? y - x : x - y);
 }
 
-static inline cl_float relative_error(float test, float expected)
+static inline cl_float relative_error( float test, float expected )
 {
     // 0-0/0 is 0 in this case, not NaN
-    if (test == 0.0f && expected == 0.0f) return 0.0f;
+    if( test == 0.0f && expected == 0.0f )
+        return 0.0f;
 
     return (test - expected) / expected;
 }
 
 extern float random_float(float low, float high);
 
-class CoordWalker {
+class CoordWalker
+{
 public:
-    CoordWalker(void *coords, bool useFloats, size_t vecSize);
+    CoordWalker( void * coords, bool useFloats, size_t vecSize );
     ~CoordWalker();
 
-    cl_float Get(size_t idx, size_t el);
+    cl_float    Get( size_t idx, size_t el );
 
 protected:
-    cl_float *mFloatCoords;
-    cl_int *mIntCoords;
-    size_t mVecSize;
+    cl_float * mFloatCoords;
+    cl_int * mIntCoords;
+    size_t    mVecSize;
 };
 
-extern cl_half convert_float_to_half(float f);
-extern int DetectFloatToHalfRoundingMode(
-    cl_command_queue); // Returns CL_SUCCESS on success
+extern int  DetectFloatToHalfRoundingMode( cl_command_queue );  // Returns CL_SUCCESS on success
 
 // sign bit: don't care, exponent: maximum value, significand: non-zero
-static int inline is_half_nan(cl_half half) { return (half & 0x7fff) > 0x7c00; }
+static int inline is_half_nan( cl_ushort half ){ return ( half & 0x7fff ) > 0x7c00; }
 
 // sign bit: don't care, exponent: zero, significand: non-zero
-static int inline is_half_denorm(cl_half half) { return IsHalfSubnormal(half); }
+static int inline is_half_denorm( cl_ushort half ){ return IsHalfSubnormal( half ); }
 
 // sign bit: don't care, exponent: zero, significand: zero
-static int inline is_half_zero(cl_half half) { return (half & 0x7fff) == 0; }
+static int inline is_half_zero( cl_ushort half ){ return ( half & 0x7fff ) == 0; }
+
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
 
 extern double sRGBmap(float fc);
 
-extern const char *convert_image_type_to_string(cl_mem_object_type imageType);
-
-
 #endif // _imageHelpers_h

diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 95b9555..1f76f36 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -28,8 +28,6 @@
 #include <fstream>
 #include <sstream>
 #include <iomanip>
-#include <mutex>
-#include <algorithm>
 
 #if defined(_WIN32)
 std::string slash = "\\";
@@ -37,23 +35,20 @@
 std::string slash = "/";
 #endif
 
-static std::mutex gCompilerMutex;
-
-static cl_int get_first_device_id(const cl_context context,
-                                  cl_device_id &device);
+static cl_int get_first_device_id(const cl_context context, cl_device_id &device);
 
 long get_file_size(const std::string &fileName)
 {
     std::ifstream ifs(fileName.c_str(), std::ios::binary);
-    if (!ifs.good()) return 0;
+    if (!ifs.good())
+        return 0;
     // get length of file:
     ifs.seekg(0, std::ios::end);
     std::ios::pos_type length = ifs.tellg();
     return static_cast<long>(length);
 }
 
-static std::string get_kernel_content(unsigned int numKernelLines,
-                                      const char *const *kernelProgram)
+static std::string get_kernel_content(unsigned int numKernelLines, const char *const *kernelProgram)
 {
     std::string kernel;
     for (size_t i = 0; i < numKernelLines; ++i)
@@ -77,11 +72,11 @@
         if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
             pos -= 2;
 
-        // check character before 'kernel' (white space expected)
+        //check character before 'kernel' (white space expected)
         size_t wsPos = source.find_last_of(" \t\r\n", pos);
         if (wsPos == std::string::npos || wsPos + 1 == pos)
         {
-            // check character after 'kernel' (white space expected)
+            //check character after 'kernel' (white space expected)
             size_t akPos = kPos + sizeof("kernel") - 1;
             wsPos = source.find_first_of(" \t\r\n", akPos);
             if (!(wsPos == akPos))
@@ -96,19 +91,21 @@
                 attributeFound = false;
                 // find '(' after kernel name name
                 size_t pPos = source.find("(", akPos);
-                if (!(pPos != std::string::npos)) continue;
+                if (!(pPos != std::string::npos))
+                    continue;
 
                 // check for not empty kernel name before '('
                 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
-                if (!(pos != std::string::npos && pos > akPos)) continue;
+                if (!(pos != std::string::npos && pos > akPos))
+                    continue;
 
-                // find character before kernel name
+                //find character before kernel name
                 wsPos = source.find_last_of(" \t\r\n", pos);
-                if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
+                if (!(wsPos != std::string::npos && wsPos >= akPos))
+                    continue;
 
-                std::string name =
-                    source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
-                // check for kernel attribute
+                std::string name = source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
+                //check for kernel attribute
                 if (name == "__attribute__")
                 {
                     attributeFound = true;
@@ -148,17 +145,20 @@
     return oss.str();
 }
 
-static std::string
-get_offline_compilation_file_type_str(const CompilationMode compilationMode)
+static std::string get_offline_compilation_file_type_str(const CompilationMode compilationMode)
 {
     switch (compilationMode)
     {
-        default: assert(0 && "Invalid compilation mode"); abort();
+        default:
+            assert(0 && "Invalid compilation mode");
+            abort();
         case kOnline:
             assert(0 && "Invalid compilation mode for offline compilation");
             abort();
-        case kBinary: return "binary";
-        case kSpir_v: return "SPIR-V";
+        case kBinary:
+            return "binary";
+        case kSpir_v:
+            return "SPIR-V";
     }
 }
 
@@ -170,40 +170,34 @@
     std::string kernelName = get_kernel_name(kernel);
     cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
     std::ostringstream oss;
-    oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
-        << kernelCrc;
-    if (buildOptions)
-    {
+    oss << kernelName <<  std::hex << std::setfill('0') << std::setw(8) << kernelCrc;
+    if(buildOptions) {
         cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
-        oss << '.' << std::hex << std::setfill('0') << std::setw(8)
-            << bOptionsCrc;
+        oss << '.' << std::hex << std::setfill('0') << std::setw(8) << bOptionsCrc;
     }
     return oss.str();
 }
 
 
-static std::string
-get_cl_build_options_filename_with_path(const std::string &filePath,
-                                        const std::string &fileNamePrefix)
-{
+static std::string 
+get_cl_build_options_filename_with_path(const std::string& filePath,
+                                        const std::string& fileNamePrefix) {
     return filePath + slash + fileNamePrefix + ".options";
 }
 
-static std::string
-get_cl_source_filename_with_path(const std::string &filePath,
-                                 const std::string &fileNamePrefix)
-{
+static std::string 
+get_cl_source_filename_with_path(const std::string& filePath,
+                                 const std::string& fileNamePrefix) {
     return filePath + slash + fileNamePrefix + ".cl";
 }
 
-static std::string
-get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
-                              const std::string &filePath,
-                              const std::string &fileNamePrefix)
-{
+static std::string 
+get_binary_filename_with_path(CompilationMode mode,
+                              cl_uint deviceAddrSpaceSize,
+                              const std::string& filePath,
+                              const std::string& fileNamePrefix) {
     std::string binaryFilename = filePath + slash + fileNamePrefix;
-    if (kSpir_v == mode)
-    {
+    if(kSpir_v == mode) {
         std::ostringstream extension;
         extension << ".spv" << deviceAddrSpaceSize;
         binaryFilename += extension.str();
@@ -211,43 +205,39 @@
     return binaryFilename;
 }
 
-static bool file_exist_on_disk(const std::string &filePath,
-                               const std::string &fileName)
-{
+static bool file_exist_on_disk(const std::string& filePath,
+                               const std::string& fileName) {
     std::string fileNameWithPath = filePath + slash + fileName;
     bool exist = false;
     std::ifstream ifs;
 
     ifs.open(fileNameWithPath.c_str(), std::ios::binary);
-    if (ifs.good()) exist = true;
+    if(ifs.good())
+        exist = true;
     ifs.close();
     return exist;
 }
 
 static bool should_save_kernel_source_to_disk(CompilationMode mode,
                                               CompilationCacheMode cacheMode,
-                                              const std::string &binaryPath,
-                                              const std::string &binaryName)
+                                              const std::string& binaryPath, 
+                                              const std::string& binaryName)
 {
     bool saveToDisk = false;
-    if (cacheMode == kCacheModeDumpCl
-        || (cacheMode == kCacheModeOverwrite && mode != kOnline))
-    {
+    if(cacheMode == kCacheModeDumpCl ||
+       (cacheMode == kCacheModeOverwrite && mode != kOnline)) {
         saveToDisk = true;
     }
-    if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
-    {
+    if(cacheMode == kCacheModeCompileIfAbsent && mode != kOnline) {
         saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
     }
     return saveToDisk;
 }
 
-static int save_kernel_build_options_to_disk(const std::string &path,
-                                             const std::string &prefix,
-                                             const char *buildOptions)
-{
-    std::string filename =
-        get_cl_build_options_filename_with_path(path, prefix);
+static int save_kernel_build_options_to_disk(const std::string& path,
+                                             const std::string& prefix,
+                                             const char *buildOptions) {
+    std::string filename = get_cl_build_options_filename_with_path(path, prefix);
     std::ofstream ofs(filename.c_str(), std::ios::binary);
     if (!ofs.good())
     {
@@ -260,10 +250,9 @@
     return CL_SUCCESS;
 }
 
-static int save_kernel_source_to_disk(const std::string &path,
-                                      const std::string &prefix,
-                                      const std::string &source)
-{
+static int save_kernel_source_to_disk(const std::string& path,
+                                      const std::string& prefix,
+                                      const std::string& source) {
     std::string filename = get_cl_source_filename_with_path(path, prefix);
     std::ofstream ofs(filename.c_str(), std::ios::binary);
     if (!ofs.good())
@@ -277,86 +266,123 @@
     return CL_SUCCESS;
 }
 
-static int
-save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
-                                       const char *const *kernelProgram,
-                                       const char *buildOptions)
+static int save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
+                                                  const char *const *kernelProgram,
+                                                  const char *buildOptions)
 {
     int error;
 
     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
-    std::string kernelNamePrefix =
-        get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
+    std::string kernelNamePrefix = get_unique_filename_prefix(numKernelLines,
+                                                             kernelProgram,
+                                                             buildOptions);
 
     // save kernel source to disk
-    error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
-                                       kernel);
-
+    error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix, kernel);
+    
     // save kernel build options to disk if exists
     if (buildOptions != NULL)
-        error |= save_kernel_build_options_to_disk(
-            gCompilationCachePath, kernelNamePrefix, buildOptions);
+        error |= save_kernel_build_options_to_disk(gCompilationCachePath, kernelNamePrefix, buildOptions);
 
     return error;
 }
 
-static std::string
-get_compilation_mode_str(const CompilationMode compilationMode)
+static std::string get_compilation_mode_str(const CompilationMode compilationMode)
 {
     switch (compilationMode)
     {
-        default: assert(0 && "Invalid compilation mode"); abort();
-        case kOnline: return "online";
-        case kBinary: return "binary";
-        case kSpir_v: return "spir-v";
+        default:
+            assert(0 && "Invalid compilation mode");
+            abort();
+        case kOnline:
+            return "online";
+        case kBinary:
+            return "binary";
+        case kSpir_v:
+            return "spir-v";
     }
 }
 
-static cl_int get_cl_device_info_str(const cl_device_id device,
-                                     const cl_uint device_address_space_size,
-                                     const CompilationMode compilationMode,
-                                     std::string &clDeviceInfo)
+#ifdef KHRONOS_OFFLINE_COMPILER
+static std::string get_khronos_compiler_command(const cl_uint device_address_space_size,
+                                                const bool openclCXX,
+                                                const std::string &bOptions,
+                                                const std::string &sourceFilename,
+                                                const std::string &outputFilename)
+{
+    // Set compiler options
+    // Emit SPIR-V
+    std::string compilerOptions = " -cc1 -emit-spirv";
+    // <triple>: for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V use spir64-unknown-unknown.
+    if(device_address_space_size == 32)
+    {
+        compilerOptions += " -triple=spir-unknown-unknown";
+    }
+    else
+    {
+        compilerOptions += " -triple=spir64-unknown-unknown";
+    }
+    // Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by Khronos)
+    if(openclCXX)
+    {
+        compilerOptions = compilerOptions + " -cl-std=c++";
+    }
+    // Set correct includes
+    if(openclCXX)
+    {
+        compilerOptions += " -I ";
+        compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR);
+    }
+    else
+    {
+        compilerOptions += " -include opencl.h";
+    }
+
+#ifdef KHRONOS_OFFLINE_COMPILER_OPTIONS
+    compilerOptions += STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER_OPTIONS);
+#endif
+
+    // Add build options passed to this function
+    compilerOptions += " " + bOptions;
+    compilerOptions +=
+        " " + sourceFilename +
+        " -o " + outputFilename;
+    std::string runString = STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER) + compilerOptions;
+
+    return runString;
+}
+#endif // KHRONOS_OFFLINE_COMPILER
+
+static cl_int get_cl_device_info_str(const cl_device_id device, const cl_uint device_address_space_size,
+                                     const CompilationMode compilationMode, std::string &clDeviceInfo)
 {
     std::string extensionsString = get_device_extensions_string(device);
     std::string versionString = get_device_version_string(device);
 
     std::ostringstream clDeviceInfoStream;
-    std::string file_type =
-        get_offline_compilation_file_type_str(compilationMode);
-    clDeviceInfoStream << "# OpenCL device info affecting " << file_type
-                       << " offline compilation:" << std::endl
-                       << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
-                       << std::endl
-                       << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
-                       << std::endl;
+    std::string file_type = get_offline_compilation_file_type_str(compilationMode);
+    clDeviceInfoStream << "# OpenCL device info affecting " << file_type << " offline compilation:" << std::endl
+                    << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size << std::endl
+                    << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\"" << std::endl;
     /* We only need the device's supported IL version(s) when compiling IL
-     * that will be loaded with clCreateProgramWithIL() */
+    * that will be loaded with clCreateProgramWithIL() */
     if (compilationMode == kSpir_v)
     {
         std::string ilVersionString = get_device_il_version_string(device);
-        clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
-                           << "\"" << std::endl;
+        clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString << "\"" << std::endl;
     }
-    clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
-                       << std::endl;
-    clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
-                       << (0 == checkForImageSupport(device)) << std::endl;
-    clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
-                       << "\"" << std::endl;
+    clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\"" << std::endl;
 
     clDeviceInfo = clDeviceInfoStream.str();
 
     return CL_SUCCESS;
 }
 
-static int write_cl_device_info(const cl_device_id device,
-                                const cl_uint device_address_space_size,
-                                const CompilationMode compilationMode,
-                                std::string &clDeviceInfoFilename)
+static int write_cl_device_info(const cl_device_id device, const cl_uint device_address_space_size,
+                                const CompilationMode compilationMode, std::string &clDeviceInfoFilename)
 {
     std::string clDeviceInfo;
-    int error = get_cl_device_info_str(device, device_address_space_size,
-                                       compilationMode, clDeviceInfo);
+    int error = get_cl_device_info_str(device, device_address_space_size, compilationMode, clDeviceInfo);
     if (error != CL_SUCCESS)
     {
         return error;
@@ -365,30 +391,25 @@
     cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
 
     /* Get the filename for the clDeviceInfo file.
-     * Note: the file includes the hash on its content, so it is usually
-     * unnecessary to delete it. */
+     * Note: the file includes the hash on its content, so it is usually unnecessary to delete it. */
     std::ostringstream clDeviceInfoFilenameStream;
-    clDeviceInfoFilenameStream << gCompilationCachePath << slash
-                               << "clDeviceInfo-";
-    clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
-                               << crc << ".txt";
+    clDeviceInfoFilenameStream << gCompilationCachePath << slash << "clDeviceInfo-";
+    clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8) << crc << ".txt";
 
     clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
 
-    if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
+    if ((size_t) get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
     {
         /* The CL device info file has already been created.
          * Nothing to do. */
         return 0;
     }
 
-    /* The file does not exist or its length is not as expected.
-     * Create/overwrite it. */
+    /* The file does not exist or its length is not as expected.  Create/overwrite it. */
     std::ofstream ofs(clDeviceInfoFilename);
     if (!ofs.good())
     {
-        log_info("OfflineCompiler: can't create CL device info file: %s\n",
-                 clDeviceInfoFilename.c_str());
+        log_info("OfflineCompiler: can't create CL device info file: %s\n", clDeviceInfoFilename.c_str());
         return -1;
     }
     ofs << clDeviceInfo;
@@ -397,11 +418,12 @@
     return CL_SUCCESS;
 }
 
-static std::string get_offline_compilation_command(
-    const cl_uint device_address_space_size,
-    const CompilationMode compilationMode, const std::string &bOptions,
-    const std::string &sourceFilename, const std::string &outputFilename,
-    const std::string &clDeviceInfoFilename)
+static std::string get_offline_compilation_command(const cl_uint device_address_space_size,
+                                                   const CompilationMode compilationMode,
+                                                   const std::string &bOptions,
+                                                   const std::string &sourceFilename,
+                                                   const std::string &outputFilename,
+                                                   const std::string &clDeviceInfoFilename)
 {
     std::ostringstream wrapperOptions;
 
@@ -425,27 +447,45 @@
                                    const CompilationMode compilationMode,
                                    const std::string &bOptions,
                                    const std::string &sourceFilename,
-                                   const std::string &outputFilename)
+                                   const std::string &outputFilename,
+                                   const bool openclCXX)
 {
     std::string runString;
-    std::string clDeviceInfoFilename;
-
-    // See cl_offline_compiler-interface.txt for a description of the
-    // format of the CL device information file generated below, and
-    // the internal command line interface for invoking the offline
-    // compiler.
-
-    cl_int err = write_cl_device_info(device, device_address_space_size,
-                                      compilationMode, clDeviceInfoFilename);
-    if (err != CL_SUCCESS)
+    if (openclCXX)
     {
-        log_error("Failed writing CL device info file\n");
-        return err;
+#ifndef KHRONOS_OFFLINE_COMPILER
+        log_error("CL C++ compilation is not possible: KHRONOS_OFFLINE_COMPILER was not defined.\n");
+        return CL_INVALID_OPERATION;
+#else
+        if (compilationMode != kSpir_v)
+        {
+            log_error("Compilation mode must be SPIR-V for Khronos compiler");
+            return -1;
+        }
+        runString = get_khronos_compiler_command(device_address_space_size, openclCXX, bOptions,
+                                                 sourceFilename, outputFilename);
+#endif
     }
+    else
+    {
+        std::string clDeviceInfoFilename;
 
-    runString = get_offline_compilation_command(
-        device_address_space_size, compilationMode, bOptions, sourceFilename,
-        outputFilename, clDeviceInfoFilename);
+        // See cl_offline_compiler-interface.txt for a description of the
+        // format of the CL device information file generated below, and
+        // the internal command line interface for invoking the offline
+        // compiler.
+
+        cl_int err = write_cl_device_info(device, device_address_space_size, compilationMode,
+                                          clDeviceInfoFilename);
+        if (err != CL_SUCCESS)
+        {
+            log_error("Failed writing CL device info file\n");
+            return err;
+        }
+
+        runString = get_offline_compilation_command(device_address_space_size, compilationMode, bOptions,
+                                                    sourceFilename, outputFilename, clDeviceInfoFilename);
+    }
 
     // execute script
     log_info("Executing command: %s\n", runString.c_str());
@@ -460,12 +500,10 @@
     return CL_SUCCESS;
 }
 
-static cl_int get_first_device_id(const cl_context context,
-                                  cl_device_id &device)
+static cl_int get_first_device_id(const cl_context context, cl_device_id &device)
 {
     cl_uint numDevices = 0;
-    cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
-                                    sizeof(cl_uint), &numDevices, NULL);
+    cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
 
     if (numDevices == 0)
@@ -475,60 +513,55 @@
     }
 
     std::vector<cl_device_id> devices(numDevices, 0);
-    error =
-        clGetContextInfo(context, CL_CONTEXT_DEVICES,
-                         numDevices * sizeof(cl_device_id), &devices[0], NULL);
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices*sizeof(cl_device_id), &devices[0], NULL);
     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
 
     device = devices[0];
     return CL_SUCCESS;
 }
 
-static cl_int get_device_address_bits(const cl_device_id device,
-                                      cl_uint &device_address_space_size)
+static cl_int get_device_address_bits(const cl_device_id device, cl_uint &device_address_space_size)
 {
-    cl_int error =
-        clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
-                        &device_address_space_size, NULL);
+    cl_int error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &device_address_space_size, NULL);
     test_error(error, "Unable to obtain device address bits");
 
     if (device_address_space_size != 32 && device_address_space_size != 64)
     {
-        log_error("ERROR: Unexpected number of device address bits: %u\n",
-                  device_address_space_size);
+        log_error("ERROR: Unexpected number of device address bits: %u\n", device_address_space_size);
         return -1;
     }
 
     return CL_SUCCESS;
 }
 
-static int get_offline_compiler_output(
-    std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
-    const CompilationMode compilationMode, const std::string &bOptions,
-    const std::string &kernelPath, const std::string &kernelNamePrefix)
+static int get_offline_compiler_output(std::ifstream &ifs,
+                                       const cl_device_id device,
+                                       cl_uint deviceAddrSpaceSize,
+                                       const bool openclCXX,
+                                       const CompilationMode compilationMode,
+                                       const std::string &bOptions,
+                                       const std::string &kernelPath,
+                                       const std::string &kernelNamePrefix)
 {
-    std::string sourceFilename =
-        get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
-    std::string outputFilename = get_binary_filename_with_path(
-        compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
+    std::string sourceFilename = get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
+    std::string outputFilename = get_binary_filename_with_path(compilationMode,
+                                                               deviceAddrSpaceSize,
+                                                               kernelPath,
+                                                               kernelNamePrefix);
 
     ifs.open(outputFilename.c_str(), std::ios::binary);
-    if (!ifs.good())
-    {
-        std::string file_type =
-            get_offline_compilation_file_type_str(compilationMode);
-        if (gCompilationCacheMode == kCacheModeForceRead)
-        {
+    if(!ifs.good()) {
+       std::string file_type = get_offline_compilation_file_type_str(compilationMode);
+        if (gCompilationCacheMode == kCacheModeForceRead) {
             log_info("OfflineCompiler: can't open cached %s file: %s\n",
                      file_type.c_str(), outputFilename.c_str());
             return -1;
         }
-        else
-        {
-            int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
-                                                compilationMode, bOptions,
-                                                sourceFilename, outputFilename);
-            if (error != CL_SUCCESS) return error;
+        else {
+            int error = invoke_offline_compiler(device, deviceAddrSpaceSize, compilationMode,
+                                                bOptions, sourceFilename, outputFilename, openclCXX);
+            if (error != CL_SUCCESS)
+                return error;
 
             // read output file
             ifs.open(outputFilename.c_str(), std::ios::binary);
@@ -538,18 +571,21 @@
                          file_type.c_str(), outputFilename.c_str());
                 return -1;
             }
-        }
+       }
     }
     return CL_SUCCESS;
 }
 
-static int create_single_kernel_helper_create_program_offline(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char *const *kernelProgram,
-    const char *buildOptions, CompilationMode compilationMode)
+static int create_single_kernel_helper_create_program_offline(cl_context context,
+                                                              cl_device_id device,
+                                                              cl_program *outProgram,
+                                                              unsigned int numKernelLines,
+                                                              const char *const *kernelProgram,
+                                                              const char *buildOptions,
+                                                              const bool openclCXX,
+                                                              CompilationMode compilationMode)
 {
-    if (kCacheModeDumpCl == gCompilationCacheMode)
-    {
+    if(kCacheModeDumpCl == gCompilationCacheMode) {
         return -1;
     }
 
@@ -562,27 +598,40 @@
         test_error(error, "Failed to get device ID for first device");
     }
     error = get_device_address_bits(device, device_address_space_size);
-    if (error != CL_SUCCESS) return error;
+    if (error != CL_SUCCESS)
+        return error;
 
     // set build options
     std::string bOptions;
     bOptions += buildOptions ? std::string(buildOptions) : "";
 
-    std::string kernelName =
-        get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
+    std::string kernelName = get_unique_filename_prefix(numKernelLines,
+                                                        kernelProgram,
+                                                        buildOptions);
+
 
 
     std::ifstream ifs;
-    error = get_offline_compiler_output(ifs, device, device_address_space_size,
-                                        compilationMode, bOptions,
-                                        gCompilationCachePath, kernelName);
-    if (error != CL_SUCCESS) return error;
+    error = get_offline_compiler_output(ifs, device, device_address_space_size, openclCXX, compilationMode, bOptions, gCompilationCachePath, kernelName);
+    if (error != CL_SUCCESS)
+      return error;
+
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    // Only OpenCL C++ to SPIR-V compilation
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    if(openclCXX)
+    {
+        return CL_SUCCESS;
+    }
+    #endif
 
     ifs.seekg(0, ifs.end);
     int length = ifs.tellg();
     ifs.seekg(0, ifs.beg);
 
-    // treat modifiedProgram as input for clCreateProgramWithBinary
+    //treat modifiedProgram as input for clCreateProgramWithBinary
     if (compilationMode == kBinary)
     {
         // read binary from file:
@@ -593,17 +642,15 @@
 
         size_t lengths = modifiedKernelBuf.size();
         const unsigned char *binaries = { &modifiedKernelBuf[0] };
-        log_info("offlineCompiler: clCreateProgramWithSource replaced with "
-                 "clCreateProgramWithBinary\n");
-        *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
-                                                &binaries, NULL, &error);
+        log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithBinary\n");
+        *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths, &binaries, NULL, &error);
         if (*outProgram == NULL || error != CL_SUCCESS)
         {
             print_error(error, "clCreateProgramWithBinary failed");
             return error;
         }
     }
-    // treat modifiedProgram as input for clCreateProgramWithIL
+    //treat modifiedProgram as input for clCreateProgramWithIL
     else if (compilationMode == kSpir_v)
     {
         // read spir-v from file:
@@ -613,44 +660,12 @@
         ifs.close();
 
         size_t length = modifiedKernelBuf.size();
-        log_info("offlineCompiler: clCreateProgramWithSource replaced with "
-                 "clCreateProgramWithIL\n");
-        if (gCoreILProgram)
-        {
-            *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
-                                                length, &error);
-        }
-        else
-        {
-            cl_platform_id platform;
-            error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
-                                    sizeof(cl_platform_id), &platform, NULL);
-            test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+        log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithIL\n");
 
-            clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-            clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(
-                    platform, "clCreateProgramWithILKHR");
-            if (clCreateProgramWithILKHR == NULL)
-            {
-                log_error(
-                    "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-                return -1;
-            }
-            *outProgram = clCreateProgramWithILKHR(
-                context, &modifiedKernelBuf[0], length, &error);
-        }
-
+        *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0], length, &error);
         if (*outProgram == NULL || error != CL_SUCCESS)
         {
-            if (gCoreILProgram)
-            {
-                print_error(error, "clCreateProgramWithIL failed");
-            }
-            else
-            {
-                print_error(error, "clCreateProgramWithILKHR failed");
-            }
+            print_error(error, "clCreateProgramWithIL failed");
             return error;
         }
     }
@@ -658,24 +673,26 @@
     return CL_SUCCESS;
 }
 
-static int create_single_kernel_helper_create_program(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions, CompilationMode compilationMode)
+static int create_single_kernel_helper_create_program(cl_context context,
+                                                      cl_device_id device,
+                                                      cl_program *outProgram,
+                                                      unsigned int numKernelLines,
+                                                      const char **kernelProgram,
+                                                      const char *buildOptions,
+                                                      const bool openclCXX,
+                                                      CompilationMode compilationMode)
 {
-    std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
+    std::string filePrefix = get_unique_filename_prefix(numKernelLines,
+                                                        kernelProgram,
+                                                        buildOptions);
+    bool shouldSaveToDisk = should_save_kernel_source_to_disk(compilationMode, 
+                                                              gCompilationCacheMode,
+                                                              gCompilationCachePath,
+                                                              filePrefix);
 
-    std::string filePrefix =
-        get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
-    bool shouldSaveToDisk = should_save_kernel_source_to_disk(
-        compilationMode, gCompilationCacheMode, gCompilationCachePath,
-        filePrefix);
-
-    if (shouldSaveToDisk)
+    if(shouldSaveToDisk)
     {
-        if (CL_SUCCESS
-            != save_kernel_source_and_options_to_disk(
-                numKernelLines, kernelProgram, buildOptions))
+        if(CL_SUCCESS != save_kernel_source_and_options_to_disk(numKernelLines, kernelProgram, buildOptions))
         {
             log_error("Unable to dump kernel source to disk");
             return -1;
@@ -686,8 +703,7 @@
         int error = CL_SUCCESS;
 
         /* Create the program object from source */
-        *outProgram = clCreateProgramWithSource(context, numKernelLines,
-                                                kernelProgram, NULL, &error);
+        *outProgram = clCreateProgramWithSource(context, numKernelLines, kernelProgram, NULL, &error);
         if (*outProgram == NULL || error != CL_SUCCESS)
         {
             print_error(error, "clCreateProgramWithSource failed");
@@ -697,9 +713,10 @@
     }
     else
     {
-        return create_single_kernel_helper_create_program_offline(
-            context, device, outProgram, numKernelLines, kernelProgram,
-            buildOptions, compilationMode);
+        return create_single_kernel_helper_create_program_offline(context, device, outProgram,
+                                                                  numKernelLines, kernelProgram,
+                                                                  buildOptions, openclCXX,
+                                                                  compilationMode);
     }
 }
 
@@ -707,111 +724,139 @@
                                                cl_program *outProgram,
                                                unsigned int numKernelLines,
                                                const char **kernelProgram,
-                                               const char *buildOptions)
+                                               const char *buildOptions,
+                                               const bool openclCXX)
 {
-    return create_single_kernel_helper_create_program(
-        context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
-        gCompilationMode);
+    return create_single_kernel_helper_create_program(context, NULL, outProgram,
+                                                      numKernelLines, kernelProgram,
+                                                      buildOptions, openclCXX,
+                                                      gCompilationMode);
 }
 
-int create_single_kernel_helper_create_program_for_device(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions)
+int create_single_kernel_helper_create_program_for_device(cl_context context,
+                                                          cl_device_id device,
+                                                          cl_program *outProgram,
+                                                          unsigned int numKernelLines,
+                                                          const char **kernelProgram,
+                                                          const char *buildOptions,
+                                                          const bool openclCXX)
 {
-    return create_single_kernel_helper_create_program(
-        context, device, outProgram, numKernelLines, kernelProgram,
-        buildOptions, gCompilationMode);
+    return create_single_kernel_helper_create_program(context, device, outProgram,
+                                                      numKernelLines, kernelProgram,
+                                                      buildOptions, openclCXX,
+                                                      gCompilationMode);
 }
 
-int create_single_kernel_helper_with_build_options(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions)
+int create_single_kernel_helper_with_build_options(cl_context context,
+                                                   cl_program *outProgram,
+                                                   cl_kernel *outKernel,
+                                                   unsigned int numKernelLines,
+                                                   const char **kernelProgram,
+                                                   const char *kernelName,
+                                                   const char *buildOptions,
+                                                   const bool openclCXX)
 {
-    return create_single_kernel_helper(context, outProgram, outKernel,
-                                       numKernelLines, kernelProgram,
-                                       kernelName, buildOptions);
+    return create_single_kernel_helper(context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, buildOptions, openclCXX);
 }
 
 // Creates and builds OpenCL C/C++ program, and creates a kernel
-int create_single_kernel_helper(cl_context context, cl_program *outProgram,
+int create_single_kernel_helper(cl_context context,
+                                cl_program *outProgram,
                                 cl_kernel *outKernel,
                                 unsigned int numKernelLines,
                                 const char **kernelProgram,
                                 const char *kernelName,
-                                const char *buildOptions)
+                                const char *buildOptions,
+                                const bool openclCXX)
 {
-    // For the logic that automatically adds -cl-std it is much cleaner if the
-    // build options have RAII. This buffer will store the potentially updated
-    // build options, in which case buildOptions will point at the string owned
-    // by this buffer.
-    std::string build_options_internal{ buildOptions ? buildOptions : "" };
-
-    // Check the build options for the -cl-std option.
-    if (!buildOptions || !strstr(buildOptions, "-cl-std"))
+    int error;
+    // Create OpenCL C++ program
+    if(openclCXX)
     {
-        // If the build option isn't present add it using the latest OpenCL-C
-        // version supported by the device. This allows calling code to force a
-        // particular CL C version if it is required, but also means that
-        // callers need not specify a version if they want to assume the most
-        // recent CL C.
-
-        auto version = get_max_OpenCL_C_for_context(context);
-
-        std::string cl_std{};
-        if (version >= Version(3, 0))
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    // Only OpenCL C++ to SPIR-V compilation
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        // Save global variable
+        bool tempgCompilationCacheMode = gCompilationCacheMode;
+        // Force OpenCL C++ -> SPIR-V compilation on every run
+        gCompilationCacheMode = kCacheModeOverwrite;
+    #endif
+        error = create_openclcpp_program(
+            context, outProgram, numKernelLines, kernelProgram, buildOptions
+        );
+        if (error != CL_SUCCESS)
         {
-            cl_std = "-cl-std=CL3.0";
+            log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+            return error;
         }
-        else if (version >= Version(2, 0) && version < Version(3, 0))
-        {
-            cl_std = "-cl-std=CL2.0";
-        }
-        else
-        {
-            // If the -cl-std build option is not specified, the highest OpenCL
-            // C 1.x language version supported by each device is used when
-            // compiling the program for each device.
-            cl_std = "";
-        }
-        build_options_internal += ' ';
-        build_options_internal += cl_std;
-        buildOptions = build_options_internal.c_str();
-    }
-    int error = create_single_kernel_helper_create_program(
-        context, outProgram, numKernelLines, kernelProgram, buildOptions);
-    if (error != CL_SUCCESS)
-    {
-        log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        // Restore global variables
+        gCompilationCacheMode = tempgCompilationCacheMode;
+        log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n", kernelName);
         return error;
+    #endif
     }
-
+    // Create OpenCL C program
+    else
+    {
+        error = create_single_kernel_helper_create_program(
+            context, outProgram, numKernelLines, kernelProgram, buildOptions
+        );
+        if (error != CL_SUCCESS)
+        {
+            log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+            return error;
+        }
+    }
     // Remove offline-compiler-only build options
     std::string newBuildOptions;
     if (buildOptions != NULL)
     {
         newBuildOptions = buildOptions;
         std::string offlineCompierOptions[] = {
-            "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
+            "-cl-fp16-enable",
+            "-cl-fp64-enable",
+            "-cl-zero-init-local-mem-vars"
         };
-        for (auto &s : offlineCompierOptions)
+        for(auto& s : offlineCompierOptions)
         {
             std::string::size_type i = newBuildOptions.find(s);
-            if (i != std::string::npos) newBuildOptions.erase(i, s.length());
+            if (i != std::string::npos)
+                newBuildOptions.erase(i, s.length());
         }
     }
     // Build program and create kernel
     return build_program_create_kernel_helper(
-        context, outProgram, outKernel, numKernelLines, kernelProgram,
-        kernelName, newBuildOptions.c_str());
+        context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, newBuildOptions.c_str()
+    );
+}
+
+// Creates OpenCL C++ program
+int create_openclcpp_program(cl_context context,
+                             cl_program *outProgram,
+                             unsigned int numKernelLines,
+                             const char **kernelProgram,
+                             const char *buildOptions)
+{
+    // Create program
+    return create_single_kernel_helper_create_program(
+        context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, true, kSpir_v
+    );
 }
 
 // Builds OpenCL C/C++ program and creates
-int build_program_create_kernel_helper(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions)
+int build_program_create_kernel_helper(cl_context context,
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions)
 {
     int error;
     /* Compile the program */
@@ -826,13 +871,13 @@
         printedSource = 1;
         log_error("Build options: %s\n", buildOptions);
         log_error("Original source is: ------------\n");
-        for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
+        for (i = 0; i < numKernelLines; i++)
+            log_error("%s", kernelProgram[i]);
     }
 
     // Verify the build status on all devices
     cl_uint deviceCount = 0;
-    error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
-                             sizeof(deviceCount), &deviceCount, NULL);
+    error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES, sizeof(deviceCount), &deviceCount, NULL);
     if (error != CL_SUCCESS)
     {
         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
@@ -845,14 +890,13 @@
         return -1;
     }
 
-    cl_device_id *devices =
-        (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
-    if (NULL == devices) return -1;
+    cl_device_id *devices = (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
+    if (NULL == devices)
+        return -1;
     BufferOwningPtr<cl_device_id> devicesBuf(devices);
 
     memset(devices, 0, deviceCount * sizeof(cl_device_id));
-    error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
-                             sizeof(cl_device_id) * deviceCount, devices, NULL);
+    error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * deviceCount, devices, NULL);
     if (error != CL_SUCCESS)
     {
         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
@@ -864,8 +908,7 @@
     for (z = 0; z < deviceCount; z++)
     {
         char deviceName[4096] = "";
-        error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
-                                deviceName, NULL);
+        error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
         if (error != CL_SUCCESS || deviceName[0] == '\0')
         {
             log_error("Device \"%d\" failed to return a name\n", z);
@@ -873,22 +916,17 @@
         }
 
         cl_build_status buildStatus;
-        error = clGetProgramBuildInfo(*outProgram, devices[z],
-                                      CL_PROGRAM_BUILD_STATUS,
-                                      sizeof(buildStatus), &buildStatus, NULL);
+        error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
         if (error != CL_SUCCESS)
         {
-            print_error(error,
-                        "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+            print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
             return error;
         }
 
-        if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
-            && deviceCount == 1)
+        if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed && deviceCount == 1)
         {
             buildFailed = true;
-            log_error("clBuildProgram returned an error, but buildStatus is "
-                      "marked as CL_BUILD_SUCCESS.\n");
+            log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
         }
 
         if (buildStatus != CL_BUILD_SUCCESS)
@@ -907,35 +945,25 @@
                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
 
             if (buildStatus != CL_BUILD_SUCCESS)
-                log_error(
-                    "Build not successful for device \"%s\", status: %s\n",
-                    deviceName, statusString);
+                log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
             size_t paramSize = 0;
-            error = clGetProgramBuildInfo(*outProgram, devices[z],
-                                          CL_PROGRAM_BUILD_LOG, 0, NULL,
-                                          &paramSize);
+            error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
             if (error != CL_SUCCESS)
             {
 
-                print_error(
-                    error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+                print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
                 return error;
             }
 
             std::string log;
             log.resize(paramSize / sizeof(char));
-            error = clGetProgramBuildInfo(*outProgram, devices[z],
-                                          CL_PROGRAM_BUILD_LOG, paramSize,
-                                          &log[0], NULL);
+            error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
             if (error != CL_SUCCESS || log[0] == '\0')
             {
-                log_error("Device %d (%s) failed to return a build log\n", z,
-                          deviceName);
+                log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
                 if (error)
                 {
-                    print_error(
-                        error,
-                        "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+                    print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
                     return error;
                 }
                 else
@@ -953,8 +981,7 @@
                     log_error("%s", kernelProgram[i]);
                 printedSource = 1;
             }
-            log_error("Build log for device \"%s\" is: ------------\n",
-                      deviceName);
+            log_error("Build log for device \"%s\" is: ------------\n", deviceName);
             log_error("%s\n", log.c_str());
             log_error("\n----------\n");
             return -1;
@@ -980,70 +1007,57 @@
     return 0;
 }
 
-int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
-                                    size_t *outMaxSize, size_t *outLimits)
+int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
 {
     cl_device_id *devices;
     size_t size, maxCommonSize = 0;
     int numDevices, i, j, error;
-    cl_uint numDims;
+  cl_uint numDims;
     size_t outSize;
-    size_t sizeLimit[] = { 1, 1, 1 };
+  size_t sizeLimit[]={1,1,1};
 
 
     /* Assume fewer than 16 devices will be returned */
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
-    test_error(error, "Unable to obtain list of devices size for context");
-    devices = (cl_device_id *)malloc(outSize);
-    BufferOwningPtr<cl_device_id> devicesBuf(devices);
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
+  test_error( error, "Unable to obtain list of devices size for context" );
+  devices = (cl_device_id *)malloc(outSize);
+  BufferOwningPtr<cl_device_id> devicesBuf(devices);
 
-    error =
-        clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
-    test_error(error, "Unable to obtain list of devices for context");
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
+  test_error( error, "Unable to obtain list of devices for context" );
 
-    numDevices = (int)(outSize / sizeof(cl_device_id));
+    numDevices = (int)( outSize / sizeof( cl_device_id ) );
 
-    for (i = 0; i < numDevices; i++)
+    for( i = 0; i < numDevices; i++ )
     {
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
-                                sizeof(size), &size, NULL);
-        test_error(error, "Unable to obtain max work group size for device");
-        if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
+        error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device" );
+        if( size < maxCommonSize || maxCommonSize == 0)
+            maxCommonSize = size;
 
-        error = clGetKernelWorkGroupInfo(kernel, devices[i],
-                                         CL_KERNEL_WORK_GROUP_SIZE,
-                                         sizeof(size), &size, NULL);
-        test_error(
-            error,
-            "Unable to obtain max work group size for device and kernel combo");
-        if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
+        error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+        if( size < maxCommonSize  || maxCommonSize == 0)
+            maxCommonSize = size;
 
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
-                                sizeof(numDims), &numDims, NULL);
-        test_error(
-            error,
-            "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
-        sizeLimit[0] = 1;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                                numDims * sizeof(size_t), sizeLimit, NULL);
-        test_error(error,
-                   "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
+    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    sizeLimit[0] = 1;
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
+        test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
 
         if (outLimits != NULL)
         {
-            if (i == 0)
-            {
-                for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
-            }
-            else
-            {
-                for (j = 0; j < (int)numDims; j++)
-                {
-                    if (sizeLimit[j] < outLimits[j])
-                        outLimits[j] = sizeLimit[j];
-                }
-            }
+      if (i == 0) {
+        for (j=0; j<3; j++)
+          outLimits[j] = sizeLimit[j];
+      } else {
+        for (j=0; j<(int)numDims; j++) {
+          if (sizeLimit[j] < outLimits[j])
+            outLimits[j] = sizeLimit[j];
         }
+      }
+    }
     }
 
     *outMaxSize = (unsigned int)maxCommonSize;
@@ -1051,230 +1065,204 @@
 }
 
 
-extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
-                                                        cl_kernel kernel,
-                                                        size_t *outSize)
+extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize )
 {
-    cl_uint maxDim;
-    size_t maxWgSize;
-    size_t *maxWgSizePerDim;
-    int error;
+    cl_uint      maxDim;
+    size_t       maxWgSize;
+    size_t       *maxWgSizePerDim;
+    int          error;
 
-    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
-                                     sizeof(size_t), &maxWgSize, NULL);
-    test_error(error,
-               "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
+    error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &maxWgSize, NULL );
+    test_error( error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed" );
 
-    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
-                            sizeof(cl_uint), &maxDim, NULL);
-    test_error(error,
-               "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
-    maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
-    if (!maxWgSizePerDim)
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &maxDim, NULL );
+    test_error( error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed" );
+    maxWgSizePerDim = (size_t*)malloc( maxDim * sizeof( size_t ) );
+    if( !maxWgSizePerDim )
     {
-        log_error("Unable to allocate maxWgSizePerDim\n");
+        log_error( "Unable to allocate maxWgSizePerDim\n" );
         return -1;
     }
 
-    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof( size_t ), maxWgSizePerDim, NULL );
+    if( error != CL_SUCCESS)
     {
-        log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
-        free(maxWgSizePerDim);
+        log_error( "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n" );
+        free( maxWgSizePerDim );
         return error;
     }
 
     // "maxWgSize" is limited to that of the first dimension.
-    if (maxWgSize > maxWgSizePerDim[0])
+    if( maxWgSize > maxWgSizePerDim[0] )
     {
         maxWgSize = maxWgSizePerDim[0];
     }
 
-    free(maxWgSizePerDim);
+    free( maxWgSizePerDim );
 
     *outSize = maxWgSize;
     return 0;
 }
 
 
-int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
-                                   size_t globalThreadSize, size_t *outMaxSize)
+int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t globalThreadSize, size_t *outMaxSize )
 {
-    size_t sizeLimit[3];
-    int error =
-        get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
-    if (error != 0) return error;
+  size_t sizeLimit[3];
+    int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
+    if( error != 0 )
+        return error;
 
-    /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
-     */
-    /* Note for speed, we don't need to check the range of maxCommonSize, b/c
-     once it gets to 1, the modulo test will succeed and break the loop anyway
-   */
-    for (;
-         (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
-         (*outMaxSize)--)
+    /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
+    /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
+     the modulo test will succeed and break the loop anyway */
+    for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
         ;
     return 0;
 }
 
-int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
-                                      size_t *globalThreadSizes,
-                                      size_t *outMaxSizes)
+int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t *globalThreadSizes, size_t *outMaxSizes )
 {
-    size_t sizeLimit[3];
+  size_t sizeLimit[3];
     size_t maxSize;
-    int error =
-        get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
-    if (error != 0) return error;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
 
-    /* Now find a set of factors, multiplied together less than maxSize, but
-       each a factor of the global sizes */
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+       sizes */
 
     /* Simple case */
-    if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
     {
-        if (globalThreadSizes[0] <= sizeLimit[0]
-            && globalThreadSizes[1] <= sizeLimit[1])
-        {
-            outMaxSizes[0] = globalThreadSizes[0];
-            outMaxSizes[1] = globalThreadSizes[1];
-            return 0;
-        }
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] &&  globalThreadSizes[ 1 ] <= sizeLimit[1]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      return 0;
+    }
     }
 
-    size_t remainingSize, sizeForThisOne;
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<2; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
     remainingSize = maxSize;
-    int i, j;
-    for (i = 0; i < 2; i++)
-    {
-        if (globalThreadSizes[i] > remainingSize)
-            sizeForThisOne = remainingSize;
-        else
-            sizeForThisOne = globalThreadSizes[i];
-        for (; (globalThreadSizes[i] % sizeForThisOne) != 0
-             || (sizeForThisOne > sizeLimit[i]);
-             sizeForThisOne--)
-            ;
-        outMaxSizes[i] = sizeForThisOne;
-        remainingSize = maxSize;
-        for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
-    }
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
 
     return 0;
 }
 
-int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
-                                      size_t *globalThreadSizes,
-                                      size_t *outMaxSizes)
+int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
+                                      size_t *globalThreadSizes, size_t *outMaxSizes )
 {
-    size_t sizeLimit[3];
+  size_t sizeLimit[3];
     size_t maxSize;
-    int error =
-        get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
-    if (error != 0) return error;
-    /* Now find a set of factors, multiplied together less than maxSize, but
-     each a factor of the global sizes */
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+     sizes */
 
     /* Simple case */
-    if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
-        <= maxSize)
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
     {
-        if (globalThreadSizes[0] <= sizeLimit[0]
-            && globalThreadSizes[1] <= sizeLimit[1]
-            && globalThreadSizes[2] <= sizeLimit[2])
-        {
-            outMaxSizes[0] = globalThreadSizes[0];
-            outMaxSizes[1] = globalThreadSizes[1];
-            outMaxSizes[2] = globalThreadSizes[2];
-            return 0;
-        }
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
+      return 0;
+    }
     }
 
-    size_t remainingSize, sizeForThisOne;
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<3; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
     remainingSize = maxSize;
-    int i, j;
-    for (i = 0; i < 3; i++)
-    {
-        if (globalThreadSizes[i] > remainingSize)
-            sizeForThisOne = remainingSize;
-        else
-            sizeForThisOne = globalThreadSizes[i];
-        for (; (globalThreadSizes[i] % sizeForThisOne) != 0
-             || (sizeForThisOne > sizeLimit[i]);
-             sizeForThisOne--)
-            ;
-        outMaxSizes[i] = sizeForThisOne;
-        remainingSize = maxSize;
-        for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
-    }
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
 
     return 0;
 }
 
 /* Helper to determine if a device supports an image format */
-int is_image_format_supported(cl_context context, cl_mem_flags flags,
-                              cl_mem_object_type image_type,
-                              const cl_image_format *fmt)
+int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
 {
     cl_image_format *list;
     cl_uint count = 0;
-    cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
-                                            NULL, &count);
-    if (count == 0) return 0;
+    cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
+    if( count == 0 )
+        return 0;
 
-    list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
-    if (NULL == list)
+    list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
+    if( NULL == list )
     {
-        log_error("Error: unable to allocate %ld byte buffer for image format "
-                  "list at %s:%d (err = %d)\n",
-                  count * sizeof(cl_image_format), __FILE__, __LINE__, err);
+        log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__,  err );
         return 0;
     }
     BufferOwningPtr<cl_image_format> listBuf(list);
 
 
-    cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
-                                              list, NULL);
-    if (error)
+    cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
+    if( error )
     {
-        log_error("Error: failed to obtain supported image type list at %s:%d "
-                  "(err = %d)\n",
-                  __FILE__, __LINE__, err);
+        log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
         return 0;
     }
 
     // iterate looking for a match.
     cl_uint i;
-    for (i = 0; i < count; i++)
+    for( i = 0; i < count; i++ )
     {
-        if (fmt->image_channel_data_type == list[i].image_channel_data_type
-            && fmt->image_channel_order == list[i].image_channel_order)
+        if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
+            fmt->image_channel_order == list[ i ].image_channel_order )
             break;
     }
 
-    return (i < count) ? 1 : 0;
+    return ( i < count ) ? 1 : 0;
 }
 
-size_t get_pixel_bytes(const cl_image_format *fmt);
-size_t get_pixel_bytes(const cl_image_format *fmt)
+size_t get_pixel_bytes( const cl_image_format *fmt );
+size_t get_pixel_bytes( const cl_image_format *fmt )
 {
     size_t chanCount;
-    switch (fmt->image_channel_order)
+    switch( fmt->image_channel_order )
     {
         case CL_R:
         case CL_A:
         case CL_Rx:
         case CL_INTENSITY:
         case CL_LUMINANCE:
-        case CL_DEPTH: chanCount = 1; break;
+        case CL_DEPTH:
+            chanCount = 1;
+            break;
         case CL_RG:
         case CL_RA:
-        case CL_RGx: chanCount = 2; break;
+        case CL_RGx:
+            chanCount = 2;
+            break;
         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
-        case CL_sRGBx: chanCount = 3; break;
+        case CL_sRGBx:
+            chanCount = 3;
+            break;
         case CL_RGBA:
         case CL_ARGB:
         case CL_BGRA:
@@ -1289,73 +1277,74 @@
             chanCount = 4;
             break;
         default:
-            log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
+            log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
             abort();
             break;
     }
 
-    switch (fmt->image_channel_data_type)
+    switch( fmt->image_channel_data_type )
     {
-        case CL_UNORM_SHORT_565:
-        case CL_UNORM_SHORT_555: return 2;
+          case CL_UNORM_SHORT_565:
+          case CL_UNORM_SHORT_555:
+            return 2;
 
-        case CL_UNORM_INT_101010: return 4;
+          case CL_UNORM_INT_101010:
+            return 4;
 
-        case CL_SNORM_INT8:
-        case CL_UNORM_INT8:
-        case CL_SIGNED_INT8:
-        case CL_UNSIGNED_INT8: return chanCount;
+          case CL_SNORM_INT8:
+          case CL_UNORM_INT8:
+          case CL_SIGNED_INT8:
+          case CL_UNSIGNED_INT8:
+            return chanCount;
 
-        case CL_SNORM_INT16:
-        case CL_UNORM_INT16:
-        case CL_HALF_FLOAT:
-        case CL_SIGNED_INT16:
-        case CL_UNSIGNED_INT16:
+          case CL_SNORM_INT16:
+          case CL_UNORM_INT16:
+          case CL_HALF_FLOAT:
+          case CL_SIGNED_INT16:
+          case CL_UNSIGNED_INT16:
 #ifdef CL_SFIXED14_APPLE
-        case CL_SFIXED14_APPLE:
+          case CL_SFIXED14_APPLE:
 #endif
             return chanCount * 2;
 
-        case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32:
-        case CL_FLOAT: return chanCount * 4;
+          case CL_SIGNED_INT32:
+          case CL_UNSIGNED_INT32:
+          case CL_FLOAT:
+            return chanCount * 4;
 
         default:
-            log_error("Unknown channel data type at %s:%d!\n", __FILE__,
-                      __LINE__);
+            log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
             abort();
     }
 
     return 0;
 }
 
-test_status verifyImageSupport(cl_device_id device)
+test_status verifyImageSupport( cl_device_id device )
 {
-    int result = checkForImageSupport(device);
-    if (result == 0)
+    int result = checkForImageSupport( device );
+    if( result == 0 )
     {
         return TEST_PASS;
     }
-    if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+    if( result == CL_IMAGE_FORMAT_NOT_SUPPORTED )
     {
-        log_error("SKIPPED: Device does not supported images as required by "
-                  "this test!\n");
+        log_error( "SKIPPED: Device does not supported images as required by this test!\n" );
         return TEST_SKIP;
     }
     return TEST_FAIL;
 }
 
-int checkForImageSupport(cl_device_id device)
+int checkForImageSupport( cl_device_id device )
 {
     cl_uint i;
     int error;
 
 
     /* Check the device props to see if images are supported at all first */
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
-    test_error(error, "Unable to query device for image support");
-    if (i == 0)
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
     {
         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
     }
@@ -1364,78 +1353,37 @@
     return 0;
 }
 
-int checkFor3DImageSupport(cl_device_id device)
+int checkFor3DImageSupport( cl_device_id device )
 {
     cl_uint i;
     int error;
 
     /* Check the device props to see if images are supported at all first */
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
-    test_error(error, "Unable to query device for image support");
-    if (i == 0)
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
     {
         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
     }
 
     char profile[128];
-    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
-                            NULL);
-    test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
-    if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
+    error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
+    test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
+    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
     {
         size_t width = -1L;
         size_t height = -1L;
         size_t depth = -1L;
-        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
-                                sizeof(width), &width, NULL);
-        test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
-        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
-                                sizeof(height), &height, NULL);
-        test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
-        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
-                                sizeof(depth), &depth, NULL);
-        test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
 
-        if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
-    }
-
-    /* So our support is good */
-    return 0;
-}
-
-int checkForReadWriteImageSupport(cl_device_id device)
-{
-    if (checkForImageSupport(device))
-    {
-        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
-    }
-
-    auto device_cl_version = get_device_cl_version(device);
-    if (device_cl_version >= Version(3, 0))
-    {
-        // In OpenCL 3.0, Read-Write images are optional.
-        // Check if they are supported.
-        cl_uint are_rw_images_supported{};
-        test_error(
-            clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                            sizeof(are_rw_images_supported),
-                            &are_rw_images_supported, nullptr),
-            "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
-        if (0 == are_rw_images_supported)
-        {
-            log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
+        if( 0 == (height | width | depth ))
             return CL_IMAGE_FORMAT_NOT_SUPPORTED;
-        }
     }
-    // READ_WRITE images are not supported on 1.X devices.
-    else if (device_cl_version < Version(2, 0))
-    {
-        log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
-        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
-    }
-    // Support for read-write image arguments is required
-    // for an 2.X device if the device supports images.
 
     /* So our support is good */
     return 0;
@@ -1445,43 +1393,51 @@
 {
     static cl_uint align_size = 0;
 
-    if (0 == align_size)
+    if( 0 == align_size )
     {
-        cl_device_id *devices;
+        cl_device_id * devices;
         size_t devices_size = 0;
         cl_uint result = 0;
         cl_int error;
         int i;
 
-        error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
-                                 &devices_size);
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  0,
+                                  NULL,
+                                  &devices_size);
         test_error_ret(error, "clGetContextInfo failed", 0);
 
-        devices = (cl_device_id *)malloc(devices_size);
-        if (devices == NULL)
-        {
-            print_error(error, "malloc failed");
+        devices = (cl_device_id*)malloc(devices_size);
+        if (devices == NULL) {
+            print_error( error, "malloc failed" );
             return 0;
         }
 
-        error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
-                                 (void *)devices, NULL);
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  devices_size,
+                                  (void*)devices,
+                                  NULL);
         test_error_ret(error, "clGetContextInfo failed", 0);
 
-        for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
+        for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
         {
             cl_uint alignment = 0;
 
-            error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
-                                    sizeof(cl_uint), (void *)&alignment, NULL);
+            error = clGetDeviceInfo (devices[i],
+                                     CL_DEVICE_MEM_BASE_ADDR_ALIGN,
+                                     sizeof(cl_uint),
+                                     (void*)&alignment,
+                                     NULL);
 
             if (error == CL_SUCCESS)
             {
-                alignment >>= 3; // convert bits to bytes
+                alignment >>= 3;    // convert bits to bytes
                 result = (alignment > result) ? alignment : result;
             }
             else
-                print_error(error, "clGetDeviceInfo failed");
+                print_error( error, "clGetDeviceInfo failed" );
         }
 
         align_size = result;
@@ -1491,291 +1447,59 @@
     return align_size;
 }
 
-cl_device_fp_config get_default_rounding_mode(cl_device_id device)
+cl_device_fp_config get_default_rounding_mode( cl_device_id device )
 {
     char profileStr[128] = "";
     cl_device_fp_config single = 0;
-    int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
-                                sizeof(single), &single, NULL);
-    if (error)
-        test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
-                       0);
+    int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
+    if( error )
+        test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
 
-    if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
+    if( single & CL_FP_ROUND_TO_NEAREST )
+        return CL_FP_ROUND_TO_NEAREST;
 
-    if (0 == (single & CL_FP_ROUND_TO_ZERO))
-        test_error_ret(-1,
-                       "FAILURE: device must support either "
-                       "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
-                       0);
+    if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
+        test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
 
     // Make sure we are an embedded device before allowing a pass
-    if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
-                                 &profileStr, NULL)))
-        test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
+        test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
 
-    if (strcmp(profileStr, "EMBEDDED_PROFILE"))
-        test_error_ret(error,
-                       "FAILURE: non-EMBEDDED_PROFILE devices must support "
-                       "CL_FP_ROUND_TO_NEAREST",
-                       0);
+    if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
+        test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
 
     return CL_FP_ROUND_TO_ZERO;
 }
 
-int checkDeviceForQueueSupport(cl_device_id device,
-                               cl_command_queue_properties prop)
+int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
 {
     cl_command_queue_properties realProps;
-    cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
-                                   sizeof(realProps), &realProps, NULL);
-    test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
+    cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( realProps ), &realProps, NULL );
+    test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
 
-    return (realProps & prop) ? 1 : 0;
+    return ( realProps & prop ) ? 1 : 0;
 }
 
-int printDeviceHeader(cl_device_id device)
+int printDeviceHeader( cl_device_id device )
 {
-    char deviceName[512], deviceVendor[512], deviceVersion[512],
-        cLangVersion[512];
+    char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
     int error;
 
-    error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
-                            deviceName, NULL);
-    test_error(error, "Unable to get CL_DEVICE_NAME for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
+    test_error( error, "Unable to get CL_DEVICE_NAME for device" );
 
-    error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
-                            deviceVendor, NULL);
-    test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
 
-    error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
-                            deviceVersion, NULL);
-    test_error(error, "Unable to get CL_DEVICE_VERSION for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
 
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
-                            sizeof(cLangVersion), cLangVersion, NULL);
-    test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
 
-    log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
-             "Device Version = %s%s%s\n",
-             deviceName, deviceVendor, deviceVersion,
-             (error == CL_SUCCESS) ? ", CL C Version = " : "",
-             (error == CL_SUCCESS) ? cLangVersion : "");
-
-    auto version = get_device_cl_version(device);
-    if (version >= Version(3, 0))
-    {
-        auto ctsVersion = get_device_info_string(
-            device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
-        log_info("Device latest conformance version passed: %s\n",
-                 ctsVersion.c_str());
-    }
+    log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
+             deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
+             ( error == CL_SUCCESS ) ? cLangVersion : "" );
 
     return CL_SUCCESS;
 }
-
-Version get_device_cl_c_version(cl_device_id device)
-{
-    auto device_cl_version = get_device_cl_version(device);
-
-    // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
-    // did not exist, but since this is just the first version we can
-    // return 1.0.
-    if (device_cl_version == Version{ 1, 0 })
-    {
-        return Version{ 1, 0 };
-    }
-
-    // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
-    // versions are backwards compatible, hence querying with the
-    // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
-    // OpenCL C version.
-    size_t opencl_c_version_size_in_bytes{};
-    auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
-                                 &opencl_c_version_size_in_bytes);
-    test_error_ret(error,
-                   "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
-                   (Version{ -1, 0 }));
-
-    std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
-                        opencl_c_version.size(), &opencl_c_version[0], nullptr);
-
-    test_error_ret(error,
-                   "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
-                   (Version{ -1, 0 }));
-
-    // Scrape out the major, minor pair from the string.
-    auto major = opencl_c_version[opencl_c_version.find('.') - 1];
-    auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
-
-    return Version{ major - '0', minor - '0' };
-}
-
-Version get_device_latest_cl_c_version(cl_device_id device)
-{
-    auto device_cl_version = get_device_cl_version(device);
-
-    // If the device version >= 3.0 it must support the
-    // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
-    // recent CL C version supported by the device.
-    if (device_cl_version >= Version{ 3, 0 })
-    {
-        size_t opencl_c_all_versions_size_in_bytes{};
-        auto error =
-            clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
-                            &opencl_c_all_versions_size_in_bytes);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (Version{ -1, 0 }));
-        std::vector<cl_name_version> name_versions(
-            opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
-        error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
-                                opencl_c_all_versions_size_in_bytes,
-                                name_versions.data(), nullptr);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (Version{ -1, 0 }));
-
-        Version max_supported_cl_c_version{};
-        for (const auto &name_version : name_versions)
-        {
-            Version current_version{ CL_VERSION_MAJOR(name_version.version),
-                                     CL_VERSION_MINOR(name_version.version) };
-            max_supported_cl_c_version =
-                (current_version > max_supported_cl_c_version)
-                ? current_version
-                : max_supported_cl_c_version;
-        }
-        return max_supported_cl_c_version;
-    }
-
-    return get_device_cl_c_version(device);
-}
-
-Version get_max_OpenCL_C_for_context(cl_context context)
-{
-    // Get all the devices in the context and find the maximum
-    // universally supported OpenCL C version.
-    size_t devices_size_in_bytes{};
-    auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
-                                  &devices_size_in_bytes);
-    test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
-                   (Version{ -1, 0 }));
-    std::vector<cl_device_id> devices(devices_size_in_bytes
-                                      / sizeof(cl_device_id));
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
-                             devices.data(), nullptr);
-    auto current_version = get_device_latest_cl_c_version(devices[0]);
-    std::for_each(std::next(devices.begin()), devices.end(),
-                  [&current_version](cl_device_id device) {
-                      auto device_version =
-                          get_device_latest_cl_c_version(device);
-                      // OpenCL 3.0 is not backwards compatible with 2.0.
-                      // If we have 3.0 and 2.0 in the same driver we
-                      // use 1.2.
-                      if (((device_version >= Version(2, 0)
-                            && device_version < Version(3, 0))
-                           && current_version >= Version(3, 0))
-                          || (device_version >= Version(3, 0)
-                              && (current_version >= Version(2, 0)
-                                  && current_version < Version(3, 0))))
-                      {
-                          current_version = Version(1, 2);
-                      }
-                      else
-                      {
-                          current_version =
-                              (std::min)(device_version, current_version);
-                      }
-                  });
-    return current_version;
-}
-
-bool device_supports_cl_c_version(cl_device_id device, Version version)
-{
-    auto device_cl_version = get_device_cl_version(device);
-
-    // In general, a device does not support an OpenCL C version if it is <=
-    // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
-    // CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
-
-    // If the device version >= 3.0 it must support the
-    // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
-    // used must appear in the query result if it's <=
-    // CL_DEVICE_OPENCL_C_VERSION.
-    if (device_cl_version >= Version{ 3, 0 })
-    {
-        size_t opencl_c_all_versions_size_in_bytes{};
-        auto error =
-            clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
-                            &opencl_c_all_versions_size_in_bytes);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (false));
-        std::vector<cl_name_version> name_versions(
-            opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
-        error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
-                                opencl_c_all_versions_size_in_bytes,
-                                name_versions.data(), nullptr);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (false));
-
-        for (const auto &name_version : name_versions)
-        {
-            Version current_version{ CL_VERSION_MAJOR(name_version.version),
-                                     CL_VERSION_MINOR(name_version.version) };
-            if (current_version == version)
-            {
-                return true;
-            }
-        }
-    }
-
-    return version <= get_device_cl_c_version(device);
-}
-
-bool poll_until(unsigned timeout_ms, unsigned interval_ms,
-                std::function<bool()> fn)
-{
-    unsigned time_spent_ms = 0;
-    bool ret = false;
-
-    while (time_spent_ms < timeout_ms)
-    {
-        ret = fn();
-        if (ret)
-        {
-            break;
-        }
-        usleep(interval_ms * 1000);
-        time_spent_ms += interval_ms;
-    }
-
-    return ret;
-}
-
-bool device_supports_double(cl_device_id device)
-{
-    if (is_extension_available(device, "cl_khr_fp64"))
-    {
-        return true;
-    }
-    else
-    {
-        cl_device_fp_config double_fp_config;
-        cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
-                                     sizeof(double_fp_config),
-                                     &double_fp_config, nullptr);
-        test_error(err,
-                   "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
-        return double_fp_config != 0;
-    }
-}
-
-bool device_supports_half(cl_device_id device)
-{
-    return is_extension_available(device, "cl_khr_fp16");
-}

diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h
index 4d8f2a8..e97ec1e 100644
--- a/test_common/harness/kernelHelpers.h
+++ b/test_common/harness/kernelHelpers.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -25,26 +25,23 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#if defined(__MINGW32__)
+#if defined (__MINGW32__)
 #include <malloc.h>
 #endif
 
 #include <string.h>
 
 #ifdef __APPLE__
-#include <OpenCL/opencl.h>
+    #include <OpenCL/opencl.h>
 #else
-#include <CL/opencl.h>
+    #include <CL/opencl.h>
 #endif
 
 #include "deviceInfo.h"
 #include "harness/alloc.h"
 
-#include <functional>
-
 /*
- *  The below code is intended to be used at the top of kernels that appear
- * inline in files to set line and file info for the kernel:
+ *  The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
  *
  *  const char *source = {
  *      INIT_OPENCL_DEBUG_INFO
@@ -54,164 +51,126 @@
  *      "}\n"
  *  };
  */
-#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO(__LINE__, __FILE__)
-#define SET_OPENCL_LINE_INFO(_line, _file)                                     \
-    "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
+#define INIT_OPENCL_DEBUG_INFO                      SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
+#define SET_OPENCL_LINE_INFO(_line, _file)          "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
 #ifndef STRINGIFY_VALUE
-#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
+    #define STRINGIFY_VALUE(_x)                     STRINGIFY(_x)
 #endif
 #ifndef STRINGIFY
-#define STRINGIFY(_x) #_x
+    #define STRINGIFY(_x)                           #_x
 #endif
 
 const int MAX_LEN_FOR_KERNEL_LIST = 20;
 
-/* Helper that creates a single program and kernel from a single-kernel program
- * source */
-extern int
-create_single_kernel_helper(cl_context context, cl_program *outProgram,
-                            cl_kernel *outKernel, unsigned int numKernelLines,
-                            const char **kernelProgram, const char *kernelName,
-                            const char *buildOptions = NULL);
+/* Helper that creates a single program and kernel from a single-kernel program source */
+extern int create_single_kernel_helper(cl_context context, 
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions = NULL,
+                                       const bool openclCXX = false);
 
-extern int create_single_kernel_helper_with_build_options(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions);
+extern int create_single_kernel_helper_with_build_options(cl_context context, 
+                                                          cl_program *outProgram,
+                                                          cl_kernel *outKernel,
+                                                          unsigned int numKernelLines,
+                                                          const char **kernelProgram,
+                                                          const char *kernelName,
+                                                          const char *buildOptions,
+                                                          const bool openclCXX = false);
 
-extern int create_single_kernel_helper_create_program(
-    cl_context context, cl_program *outProgram, unsigned int numKernelLines,
-    const char **kernelProgram, const char *buildOptions = NULL);
+extern int create_single_kernel_helper_create_program(cl_context context, 
+                                                      cl_program *outProgram,
+                                                      unsigned int numKernelLines,
+                                                      const char **kernelProgram,
+                                                      const char *buildOptions = NULL,
+                                                      const bool openclCXX = false);
+                                                      
+extern int create_single_kernel_helper_create_program_for_device(cl_context context,
+                                                                 cl_device_id device,
+                                                                 cl_program *outProgram,
+                                                                 unsigned int numKernelLines,
+                                                                 const char **kernelProgram,
+                                                                 const char *buildOptions = NULL,
+                                                                 const bool openclCXX = false);
 
-extern int create_single_kernel_helper_create_program_for_device(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions = NULL);
-
-/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++
- * program. */
-extern int create_openclcpp_program(cl_context context, cl_program *outProgram,
+/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++ program. */
+extern int create_openclcpp_program(cl_context context, 
+                                    cl_program *outProgram,
                                     unsigned int numKernelLines,
                                     const char **kernelProgram,
                                     const char *buildOptions = NULL);
 
 /* Builds program (outProgram) and creates one kernel */
-int build_program_create_kernel_helper(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions = NULL);
+int build_program_create_kernel_helper(cl_context context,
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions = NULL);
 
-/* Helper to obtain the biggest fit work group size for all the devices in a
- * given group and for the given global thread size */
-extern int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
-                                          size_t globalThreadSize,
-                                          size_t *outSize);
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
 
-/* Helper to obtain the biggest fit work group size for all the devices in a
- * given group and for the given global thread size */
-extern int get_max_common_2D_work_group_size(cl_context context,
-                                             cl_kernel kernel,
-                                             size_t *globalThreadSize,
-                                             size_t *outSizes);
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
 
-/* Helper to obtain the biggest fit work group size for all the devices in a
- * given group and for the given global thread size */
-extern int get_max_common_3D_work_group_size(cl_context context,
-                                             cl_kernel kernel,
-                                             size_t *globalThreadSize,
-                                             size_t *outSizes);
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
 
-/* Helper to obtain the biggest allowed work group size for all the devices in a
- * given group */
-extern int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
-                                           size_t *outSize, size_t *outLimits);
+/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
+extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
 
 /* Helper to obtain the biggest allowed 1D work group size on a given device */
-extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
-                                                        cl_kernel kernel,
-                                                        size_t *outSize);
+extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize );
 
 /* Helper to determine if a device supports an image format */
-extern int is_image_format_supported(cl_context context, cl_mem_flags flags,
-                                     cl_mem_object_type image_type,
-                                     const cl_image_format *fmt);
+extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
 
 /* Helper to get pixel size for a pixel format */
-size_t get_pixel_bytes(const cl_image_format *fmt);
+size_t get_pixel_bytes( const cl_image_format *fmt );
 
 /* Verify the given device supports images. */
-extern test_status verifyImageSupport(cl_device_id device);
+extern test_status verifyImageSupport( cl_device_id device );
 
-/* Checks that the given device supports images. Same as verify, but doesn't
- * print an error */
-extern int checkForImageSupport(cl_device_id device);
-extern int checkFor3DImageSupport(cl_device_id device);
-extern int checkForReadWriteImageSupport(cl_device_id device);
+/* Checks that the given device supports images. Same as verify, but doesn't print an error */
+extern int checkForImageSupport( cl_device_id device );
+extern int checkFor3DImageSupport( cl_device_id device );
 
-/* Checks that a given queue property is supported on the specified device.
- * Returns 1 if supported, 0 if not or an error. */
-extern int checkDeviceForQueueSupport(cl_device_id device,
-                                      cl_command_queue_properties prop);
+/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
+extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
 
-/* Helper to obtain the min alignment for a given context, i.e the max of all
- * min alignments for devices attached to the context*/
+/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
 size_t get_min_alignment(cl_context context);
 
-/* Helper to obtain the default rounding mode for single precision computation.
- * (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
-cl_device_fp_config get_default_rounding_mode(cl_device_id device);
+/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
+cl_device_fp_config get_default_rounding_mode( cl_device_id device );
 
-#define PASSIVE_REQUIRE_IMAGE_SUPPORT(device)                                  \
-    if (checkForImageSupport(device))                                          \
-    {                                                                          \
-        log_info(                                                              \
-            "\n\tNote: device does not support images. Skipping test...\n");   \
-        return TEST_SKIPPED_ITSELF;                                            \
+#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device )    \
+    if( checkForImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support images. Skipping test...\n" );    \
+        return 0;    \
     }
 
-#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)                               \
-    if (checkFor3DImageSupport(device))                                        \
-    {                                                                          \
-        log_info("\n\tNote: device does not support 3D images. Skipping "      \
-                 "test...\n");                                                 \
-        return TEST_SKIPPED_ITSELF;                                            \
+#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )    \
+    if( checkFor3DImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" );    \
+        return 0;    \
     }
 
-#define PASSIVE_REQUIRE_FP16_SUPPORT(device)                                   \
-    if (!device_supports_half(device))                                         \
-    {                                                                          \
-        log_info(                                                              \
-            "\n\tNote: device does not support fp16. Skipping test...\n");     \
-        return TEST_SKIPPED_ITSELF;                                            \
+#define PASSIVE_REQUIRE_FP16_SUPPORT(device)                            \
+    if (!is_extension_available(device, "cl_khr_fp16"))                 \
+    {                                                                   \
+        log_info("\n\tNote: device does not support fp16. Skipping test...\n"); \
+        return 0;                                                       \
     }
 
-/* Prints out the standard device header for all tests given the device to print
- * for */
-extern int printDeviceHeader(cl_device_id device);
-
-// Execute the CL_DEVICE_OPENCL_C_VERSION query and return the OpenCL C version
-// is supported by the device.
-Version get_device_cl_c_version(cl_device_id device);
-
-// Gets the latest (potentially non-backward compatible) OpenCL C version
-// supported by the device.
-Version get_device_latest_cl_c_version(cl_device_id device);
-
-// Gets the maximum universally supported OpenCL C version in a context, i.e.
-// the OpenCL C version supported by all devices in a context.
-Version get_max_OpenCL_C_for_context(cl_context context);
-
-// Checks whether a particular OpenCL C version is supported by the device.
-bool device_supports_cl_c_version(cl_device_id device, Version version);
-
-// Poll fn every interval_ms until timeout_ms or it returns true
-bool poll_until(unsigned timeout_ms, unsigned interval_ms,
-                std::function<bool()> fn);
-
-// Checks whether the device supports double data types
-bool device_supports_double(cl_device_id device);
-
-// Checks whether the device supports half data types
-bool device_supports_half(cl_device_id device);
+/* Prints out the standard device header for all tests given the device to print for */
+extern int printDeviceHeader( cl_device_id device );
 
 #endif // _kernelHelpers_h

diff --git a/test_common/harness/mingw_compat.c b/test_common/harness/mingw_compat.c
index 5b38472..54c4463 100644
--- a/test_common/harness/mingw_compat.c
+++ b/test_common/harness/mingw_compat.c

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,44 +19,41 @@
 #include <stdio.h>
 #include <string.h>
 
-// This function is unavailable on various mingw compilers,
-// especially 64 bit so implementing it here
-const char *basename_dot = ".";
-char *basename(char *path)
+//This function is unavailable on various mingw compilers,
+//especially 64 bit so implementing it here
+const char *basename_dot=".";
+char*
+basename(char *path)
 {
     char *p = path, *b = NULL;
     int len = strlen(path);
 
-    if (path == NULL)
-    {
-        return (char *)basename_dot;
+    if (path == NULL) {
+        return (char*)basename_dot;
     }
 
     // Not absolute path on windows
-    if (path[1] != ':')
-    {
+    if (path[1] != ':') {
         return path;
     }
 
     // Trim trailing path seperators
-    if (path[len - 1] == '\\' || path[len - 1] == '/')
-    {
+    if (path[len - 1]  == '\\' ||
+        path[len - 1]  == '/' ) {
         len--;
         path[len] = '\0';
     }
 
-    while (len)
-    {
-        while ((*p != '\\' || *p != '/') && len)
-        {
+    while (len) {
+        while((*p != '\\' || *p != '/')  && len) {
             p++;
             len--;
         }
         p++;
         b = p;
-    }
+     }
 
-    return b;
+     return b;
 }
 
 #endif
\ No newline at end of file

diff --git a/test_common/harness/mingw_compat.h b/test_common/harness/mingw_compat.h
index a509c75..ab28f39 100644
--- a/test_common/harness/mingw_compat.h
+++ b/test_common/harness/mingw_compat.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -21,7 +21,7 @@
 #include <malloc.h>
 
 #if defined(__MINGW64__)
-// mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
+//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
 #define __mingw_aligned_malloc _aligned_malloc
 #define __mingw_aligned_free _aligned_free
 #include <stddef.h>

diff --git a/test_common/harness/msvc9.c b/test_common/harness/msvc9.c
index 29b45d6..1c0cf2b 100644
--- a/test_common/harness/msvc9.c
+++ b/test_common/harness/msvc9.c

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,7 +15,7 @@
 //
 #include "compat.h"
 
-#if defined(_MSC_VER)
+#if defined ( _MSC_VER )
 
 #include <limits.h>
 #include <stdlib.h>
@@ -24,7 +24,7 @@
 
 #include <windows.h>
 
-#if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
+#if _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
 
 ///////////////////////////////////////////////////////////////////
 //
@@ -32,12 +32,9 @@
 //
 ///////////////////////////////////////////////////////////////////
 
-float copysignf(float x, float y)
+float copysignf( float x, float y )
 {
-    union {
-        cl_uint u;
-        float f;
-    } ux, uy;
+    union{ cl_uint u; float f; }ux, uy;
 
     ux.f = x;
     uy.f = y;
@@ -47,12 +44,9 @@
     return ux.f;
 }
 
-double copysign(double x, double y)
+double copysign( double x, double y )
 {
-    union {
-        cl_ulong u;
-        double f;
-    } ux, uy;
+    union{ cl_ulong u; double f; }ux, uy;
 
     ux.f = x;
     uy.f = y;
@@ -62,16 +56,13 @@
     return ux.f;
 }
 
-long double copysignl(long double x, long double y)
+long double copysignl( long double x, long double y )
 {
-    union {
+    union
+    {
         long double f;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } ux, uy;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    }ux, uy;
 
     ux.f = x;
     uy.f = y;
@@ -85,12 +76,12 @@
 {
     float absx = fabsf(x);
 
-    if (absx < 8388608.0f /* 0x1.0p23f */)
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
     {
-        float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
         float rounded = x + magic;
         rounded -= magic;
-        x = copysignf(rounded, x);
+        x = copysignf( rounded, x );
     }
 
     return x;
@@ -100,12 +91,12 @@
 {
     double absx = fabs(x);
 
-    if (absx < 4503599627370496.0 /* 0x1.0p52f */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
     {
-        double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
         double rounded = x + magic;
         rounded -= magic;
-        x = copysign(rounded, x);
+        x = copysign( rounded, x );
     }
 
     return x;
@@ -115,13 +106,12 @@
 {
     double absx = fabs(x);
 
-    if (absx < 9223372036854775808.0L /* 0x1.0p64f */)
+    if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
     {
-        long double magic =
-            copysignl(9223372036854775808.0L /* 0x1.0p63L */, x);
+        long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
         long double rounded = x + magic;
         rounded -= magic;
-        x = copysignl(rounded, x);
+        x = copysignl( rounded, x );
     }
 
     return x;
@@ -135,31 +125,30 @@
 //
 ///////////////////////////////////////////////////////////////////
 #ifndef FP_ILOGB0
-#define FP_ILOGB0 INT_MIN
+    #define FP_ILOGB0   INT_MIN
 #endif
 
 #ifndef FP_ILOGBNAN
-#define FP_ILOGBNAN INT_MIN
+    #define FP_ILOGBNAN INT_MIN
 #endif
 
-int ilogb(double x)
+int ilogb (double x)
 {
-    union {
-        double f;
-        cl_ulong u;
-    } u;
+    union{ double f; cl_ulong u;} u;
     u.f = x;
 
     cl_ulong absx = u.u & CL_LONG_MAX;
-    if (absx - 0x0001000000000000ULL
-        >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
+    if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
     {
-        switch (absx)
+        switch( absx )
         {
-            case 0: return FP_ILOGB0;
-            case 0x7ff0000000000000ULL: return INT_MAX;
+            case 0:
+                return FP_ILOGB0;
+            case 0x7ff0000000000000ULL:
+                return INT_MAX;
             default:
-                if (absx > 0x7ff0000000000000ULL) return FP_ILOGBNAN;
+                if( absx > 0x7ff0000000000000ULL )
+                    return FP_ILOGBNAN;
 
                 // subnormal
                 u.u = absx | 0x3ff0000000000000ULL;
@@ -172,23 +161,23 @@
 }
 
 
-int ilogbf(float x)
+int ilogbf (float x)
 {
-    union {
-        float f;
-        cl_uint u;
-    } u;
+    union{ float f; cl_uint u;} u;
     u.f = x;
 
     cl_uint absx = u.u & 0x7fffffff;
-    if (absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
+    if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
     {
-        switch (absx)
+        switch( absx )
         {
-            case 0: return FP_ILOGB0;
-            case 0x7f800000U: return INT_MAX;
+            case 0:
+                return FP_ILOGB0;
+            case 0x7f800000U:
+                return INT_MAX;
             default:
-                if (absx > 0x7f800000) return FP_ILOGBNAN;
+                if( absx > 0x7f800000 )
+                    return FP_ILOGBNAN;
 
                 // subnormal
                 u.u = absx | 0x3f800000U;
@@ -200,33 +189,32 @@
     return (absx >> 23) - 127;
 }
 
-int ilogbl(long double x)
+int ilogbl (long double x)
 {
-    union {
+    union
+    {
         long double f;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
     } u;
     u.f = x;
 
     int exp = u.u.sexp & 0x7fff;
-    if (0 == exp)
+    if( 0 == exp )
     {
-        if (0 == u.u.m) return FP_ILOGB0;
+        if( 0 == u.u.m )
+            return FP_ILOGB0;
 
-        // subnormal
+        //subnormal
         u.u.sexp = 0x3fff;
         u.f -= 1.0f;
         exp = u.u.sexp & 0x7fff;
 
         return exp - (0x3fff + 0x3ffe);
     }
-    else if (0x7fff == exp)
+    else if( 0x7fff == exp )
     {
-        if (u.u.m & CL_LONG_MAX) return FP_ILOGBNAN;
+        if( u.u.m & CL_LONG_MAX )
+            return FP_ILOGBNAN;
 
         return INT_MAX;
     }
@@ -244,10 +232,7 @@
 
 static void GET_BITS_SP32(float fx, unsigned int* ux)
 {
-    volatile union {
-        float f;
-        unsigned int u;
-    } _bitsy;
+    volatile union {float f; unsigned int u;} _bitsy;
     _bitsy.f = (fx);
     *ux = _bitsy.u;
 }
@@ -259,10 +244,7 @@
 /* } */
 static void PUT_BITS_SP32(unsigned int ux, float* fx)
 {
-    volatile union {
-        float f;
-        unsigned int u;
-    } _bitsy;
+    volatile union {float f; unsigned int u;} _bitsy;
     _bitsy.u = (ux);
     *fx = _bitsy.f;
 }
@@ -274,19 +256,13 @@
 /* } */
 static void GET_BITS_DP64(double dx, unsigned __int64* lx)
 {
-    volatile union {
-        double d;
-        unsigned __int64 l;
-    } _bitsy;
+    volatile union {double d; unsigned __int64 l;} _bitsy;
     _bitsy.d = (dx);
     *lx = _bitsy.l;
 }
 static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
 {
-    volatile union {
-        double d;
-        unsigned __int64 l;
-    } _bitsy;
+    volatile union {double d; unsigned __int64 l;} _bitsy;
     _bitsy.l = (lx);
     *dx = _bitsy.d;
 }
@@ -311,7 +287,8 @@
    that x is NaN; gcc does. */
 double fmax(double x, double y)
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x >= y ? x : y;
 }
@@ -324,15 +301,17 @@
 
 double fmin(double x, double y)
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x <= y ? x : y;
 }
 
 
-float fmaxf(float x, float y)
+float fmaxf( float x, float y )
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x >= y ? x : y;
 }
@@ -344,31 +323,31 @@
 
 float fminf(float x, float y)
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x <= y ? x : y;
 }
 
 long double scalblnl(long double x, long n)
 {
-    union {
+    union
+    {
         long double d;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } u;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
     u.u.m = CL_LONG_MIN;
 
-    if (x == 0.0L || n < -2200) return copysignl(0.0L, x);
+    if( x == 0.0L || n < -2200)
+        return copysignl( 0.0L, x );
 
-    if (n > 2200) return INFINITY;
+    if( n > 2200 )
+        return INFINITY;
 
-    if (n < 0)
+    if( n < 0 )
     {
         u.u.sexp = 0x3fff - 1022;
-        while (n <= -1022)
+        while( n <= -1022 )
         {
             x *= u.d;
             n += 1022;
@@ -378,10 +357,10 @@
         return x;
     }
 
-    if (n > 0)
+    if( n > 0 )
     {
         u.u.sexp = 0x3fff + 1023;
-        while (n >= 1023)
+        while( n >= 1023 )
         {
             x *= u.d;
             n -= 1023;
@@ -399,12 +378,15 @@
 //                          log2
 //
 ///////////////////////////////////////////////////////////////////
-const static cl_double log_e_base2 = 1.4426950408889634074;
-const static cl_double log_10_base2 = 3.3219280948873623478;
+const static cl_double log_e_base2   = 1.4426950408889634074;
+const static cl_double log_10_base2  = 3.3219280948873623478;
 
-// double log10(double x);
+//double log10(double x);
 
-double log2(double x) { return 1.44269504088896340735992468100189214 * log(x); }
+double log2(double x)
+{
+    return 1.44269504088896340735992468100189214 * log(x);
+}
 
 long double log2l(long double x)
 {
@@ -415,23 +397,23 @@
 {
     double absx = fabs(x);
 
-    if (absx < 4503599627370496.0 /* 0x1.0p52f */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
     {
         cl_long rounded = x;
-        x = copysign((double)rounded, x);
+        x = copysign( (double) rounded, x );
     }
 
     return x;
 }
 
-float truncf(float x)
+float  truncf(float x)
 {
     float absx = fabsf(x);
 
-    if (absx < 8388608.0f /* 0x1.0p23f */)
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
     {
         cl_int rounded = x;
-        x = copysignf((float)rounded, x);
+        x = copysignf( (float) rounded, x );
     }
 
     return x;
@@ -441,69 +423,75 @@
 {
     double absx = fabs(x);
 
-    if (absx < 0.5) return 0;
+    if( absx < 0.5 )
+        return 0;
 
-    if (absx < 4503599627370496.0 /* 0x1.0p52 */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
     {
         absx += 0.5;
         cl_long rounded = absx;
         absx = rounded;
-        x = copysign(absx, x);
+        x = copysign( absx, x );
     }
 
-    if (x >= (double)LONG_MAX) return LONG_MAX;
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
 
-    return (long)x;
+    return (long) x;
 }
 
 long lroundf(float x)
 {
     float absx = fabsf(x);
 
-    if (absx < 0.5f) return 0;
+    if( absx < 0.5f )
+        return 0;
 
-    if (absx < 8388608.0f)
+    if( absx < 8388608.0f )
     {
         absx += 0.5f;
         cl_int rounded = absx;
         absx = rounded;
-        x = copysignf(absx, x);
+        x = copysignf(  absx, x );
     }
 
-    if (x >= (float)LONG_MAX) return LONG_MAX;
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
 
-    return (long)x;
+    return (long) x;
 }
 
 double round(double x)
 {
     double absx = fabs(x);
 
-    if (absx < 0.5) return copysign(0.0, x);
+    if( absx < 0.5 )
+        return copysign( 0.0, x);
 
-    if (absx < 4503599627370496.0 /* 0x1.0p52 */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
     {
         absx += 0.5;
         cl_long rounded = absx;
         absx = rounded;
-        x = copysign(absx, x);
+        x = copysign( absx, x );
     }
 
     return x;
 }
 
-float roundf(float x)
+float  roundf(float x)
 {
     float absx = fabsf(x);
 
-    if (absx < 0.5f) return copysignf(0.0f, x);
+    if( absx < 0.5f )
+        return copysignf( 0.0f, x);
 
-    if (absx < 8388608.0f)
+    if( absx < 8388608.0f )
     {
         absx += 0.5f;
         cl_int rounded = absx;
         absx = rounded;
-        x = copysignf(absx, x);
+        x = copysignf( absx, x );
     }
 
     return x;
@@ -513,59 +501,65 @@
 {
     long double absx = fabsl(x);
 
-    if (absx < 0.5L) return copysignl(0.0L, x);
+    if( absx < 0.5L )
+        return copysignl( 0.0L, x);
 
-    if (absx < 9223372036854775808.0L /*0x1.0p63L*/)
+    if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
     {
         absx += 0.5L;
         cl_ulong rounded = absx;
         absx = rounded;
-        x = copysignl(absx, x);
+        x = copysignl( absx, x );
     }
 
     return x;
 }
 
-float cbrtf(float x)
+float cbrtf( float x )
 {
-    float z = pow(fabs((double)x), 1.0 / 3.0);
-    return copysignf(z, x);
+    float z = pow( fabs((double) x), 1.0 / 3.0 );
+    return copysignf( z, x );
 }
 
-double cbrt(double x) { return copysign(pow(fabs(x), 1.0 / 3.0), x); }
+double cbrt( double x )
+{
+    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
+}
 
-long int lrint(double x)
+long int lrint (double x)
 {
     double absx = fabs(x);
 
-    if (x >= (double)LONG_MAX) return LONG_MAX;
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
 
-    if (absx < 4503599627370496.0 /* 0x1.0p52 */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
     {
-        double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
         double rounded = x + magic;
         rounded -= magic;
-        return (long int)rounded;
+        return (long int) rounded;
     }
 
-    return (long int)x;
+    return (long int) x;
 }
 
-long int lrintf(float x)
+long int lrintf (float x)
 {
     float absx = fabsf(x);
 
-    if (x >= (float)LONG_MAX) return LONG_MAX;
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
 
-    if (absx < 8388608.0f /* 0x1.0p23f */)
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
     {
-        float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
         float rounded = x + magic;
         rounded -= magic;
-        return (long int)rounded;
+        return (long int) rounded;
     }
 
-    return (long int)x;
+    return (long int) x;
 }
 
 #endif // _MSC_VER < 1900
@@ -580,12 +574,13 @@
 int fetestexcept(int excepts)
 {
     unsigned int status = _statusfp();
-    return excepts
-        & (((status & _SW_INEXACT) ? FE_INEXACT : 0)
-           | ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)
-           | ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)
-           | ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0)
-           | ((status & _SW_INVALID) ? FE_INVALID : 0));
+    return excepts & (
+        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
+        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
+        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
+        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
+        ((status & _SW_INVALID) ? FE_INVALID : 0)
+    );
 }
 
 int feclearexcept(int excepts)
@@ -597,36 +592,33 @@
 
 #endif // __INTEL_COMPILER
 
-#if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
+#if _MSC_VER < 1900 && ( ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 )
 
-float nanf(const char* str)
+float nanf( const char* str)
 {
-    cl_uint u = atoi(str);
+    cl_uint u = atoi( str );
     u |= 0x7fc00000U;
-    return *(float*)(&u);
+    return *( float*)(&u);
 }
 
 
-double nan(const char* str)
+double nan( const char* str)
 {
-    cl_ulong u = atoi(str);
+    cl_ulong u = atoi( str );
     u |= 0x7ff8000000000000ULL;
-    return *(double*)(&u);
+    return *( double*)(&u);
 }
 
 // double check this implementatation
-long double nanl(const char* str)
+long double nanl( const char* str)
 {
-    union {
+    union
+    {
         long double f;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } u;
+        struct { cl_ulong m; cl_ushort sexp; }u;
+    }u;
     u.u.sexp = 0x7fff;
-    u.u.m = 0x8000000000000000ULL | atoi(str);
+    u.u.m = 0x8000000000000000ULL | atoi( str );
 
     return u.f;
 }
@@ -640,35 +632,32 @@
 ///////////////////////////////////////////////////////////////////
 
 /*
-// This function is commented out because the Windows implementation should
-never call munmap.
+// This function is commented out because the Windows implementation should never call munmap.
 // If it is calling it, we have a bug. Please file a bugzilla.
 int munmap(void *addr, size_t len)
 {
-// FIXME: this is not correct.  munmap is like free()
-// http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
+// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
 
     return (int)VirtualAlloc( (LPVOID)addr, len,
                   MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
 }
 */
 
-uint64_t ReadTime(void)
+uint64_t ReadTime( void )
 {
     LARGE_INTEGER current;
     QueryPerformanceCounter(&current);
     return (uint64_t)current.QuadPart;
 }
 
-double SubtractTime(uint64_t endTime, uint64_t startTime)
+double SubtractTime( uint64_t endTime, uint64_t startTime )
 {
     static double PerformanceFrequency = 0.0;
 
-    if (PerformanceFrequency == 0.0)
-    {
+    if (PerformanceFrequency == 0.0) {
         LARGE_INTEGER frequency;
         QueryPerformanceFrequency(&frequency);
-        PerformanceFrequency = (double)frequency.QuadPart;
+        PerformanceFrequency = (double) frequency.QuadPart;
     }
 
     return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
@@ -676,38 +665,40 @@
 
 int cf_signbit(double x)
 {
-    union {
+    union
+    {
         double f;
         cl_ulong u;
-    } u;
+    }u;
     u.f = x;
     return u.u >> 63;
 }
 
 int cf_signbitf(float x)
 {
-    union {
+    union
+    {
         float f;
         cl_uint u;
-    } u;
+    }u;
     u.f = x;
     return u.u >> 31;
 }
 
-float int2float(int32_t ix)
+float int2float (int32_t ix)
 {
     union {
-        float f;
+        float   f;
         int32_t i;
     } u;
     u.i = ix;
     return u.f;
 }
 
-int32_t float2int(float fx)
+int32_t float2int (float   fx)
 {
     union {
-        float f;
+        float   f;
         int32_t i;
     } u;
     u.f = fx;
@@ -731,50 +722,27 @@
     return 31 - res;
 #endif
     unsigned long index;
-    unsigned char res = _BitScanReverse(&index, pattern);
-    if (res)
-    {
-        return 8 * sizeof(int) - 1 - index;
-    }
-    else
-    {
-        return 8 * sizeof(int);
+    unsigned char res = _BitScanReverse( &index, pattern);
+    if (res) {
+        return 8*sizeof(int) - 1 - index;
+    } else {
+        return 8*sizeof(int);
     }
 }
 #else
 int __builtin_clz(unsigned int pattern)
 {
-    int count;
-    if (pattern == 0u)
-    {
-        return 32;
-    }
-    count = 31;
-    if (pattern >= 1u << 16)
-    {
-        pattern >>= 16;
-        count -= 16;
-    }
-    if (pattern >= 1u << 8)
-    {
-        pattern >>= 8;
-        count -= 8;
-    }
-    if (pattern >= 1u << 4)
-    {
-        pattern >>= 4;
-        count -= 4;
-    }
-    if (pattern >= 1u << 2)
-    {
-        pattern >>= 2;
-        count -= 2;
-    }
-    if (pattern >= 1u << 1)
-    {
-        count -= 1;
-    }
-    return count;
+   int count;
+   if (pattern == 0u) {
+       return 32;
+   }
+   count = 31;
+   if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
+   if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
+   if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
+   if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
+   if (pattern >=  1u<<1) {                 count -=  1; }
+   return count;
 }
 
 #endif // !defined(_WIN64)
@@ -788,9 +756,9 @@
     return 0;
 }
 
-unsigned int sleep(unsigned int sec)
+unsigned int sleep( unsigned int sec )
 {
-    Sleep(sec * 1000);
+    Sleep( sec * 1000 );
     return 0;
 }
 

diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp
index c32d9ba..a4fbf59 100644
--- a/test_common/harness/mt19937.cpp
+++ b/test_common/harness/mt19937.cpp

@@ -26,8 +26,8 @@
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
-   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
@@ -51,15 +51,15 @@
 #include "harness/alloc.h"
 
 #ifdef __SSE2__
-#include <emmintrin.h>
+    #include <emmintrin.h>
 #endif
 
 /* Period parameters */
-#define N 624 /* vector code requires multiple of 4 here */
+#define N 624   /* vector code requires multiple of 4 here */
 #define M 397
-#define MATRIX_A (cl_uint)0x9908b0dfUL /* constant vector a */
-#define UPPER_MASK (cl_uint)0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK (cl_uint)0x7fffffffUL /* least significant r bits */
+#define MATRIX_A    (cl_uint) 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK  (cl_uint) 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK  (cl_uint) 0x7fffffffUL /* least significant r bits */
 
 typedef struct _MTdata
 {
@@ -67,27 +67,26 @@
 #ifdef __SSE2__
     cl_uint cache[N];
 #endif
-    cl_int mti;
-} _MTdata;
+    cl_int  mti;
+}_MTdata;
 
 /* initializes mt[N] with a seed */
 MTdata init_genrand(cl_uint s)
 {
-    MTdata r = (MTdata)align_malloc(sizeof(_MTdata), 16);
-    if (NULL != r)
+    MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
+    if( NULL != r )
     {
         cl_uint *mt = r->mt;
         int mti = 0;
-        mt[0] = s; // & 0xffffffffUL;
-        for (mti = 1; mti < N; mti++)
-        {
-            mt[mti] = (cl_uint)(
-                1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
+        mt[0]= s; // & 0xffffffffUL;
+        for (mti=1; mti<N; mti++) {
+            mt[mti] = (cl_uint)
+            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
             /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
             /* In the previous versions, MSBs of the seed affect   */
             /* only MSBs of the array mt[].                        */
             /* 2002/01/09 modified by Makoto Matsumoto             */
-            // mt[mti] &= 0xffffffffUL;
+    //        mt[mti] &= 0xffffffffUL;
             /* for >32 bit machines */
         }
         r->mti = mti;
@@ -96,22 +95,20 @@
     return r;
 }
 
-void free_mtdata(MTdata d)
+void    free_mtdata( MTdata d )
 {
-    if (d) align_free(d);
+    if(d)
+        align_free(d);
 }
 
 /* generates a random number on [0,0xffffffff]-interval */
-cl_uint genrand_int32(MTdata d)
+cl_uint genrand_int32( MTdata d)
 {
     /* mag01[x] = x * MATRIX_A  for x=0,1 */
-    static const cl_uint mag01[2] = { 0x0UL, MATRIX_A };
+    static const cl_uint mag01[2]={0x0UL, MATRIX_A};
 #ifdef __SSE2__
     static volatile int init = 0;
-    static union {
-        __m128i v;
-        cl_uint s[4];
-    } upper_mask, lower_mask, one, matrix_a, c0, c1;
+    static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
 #endif
 
 
@@ -123,17 +120,14 @@
         int kk;
 
 #ifdef __SSE2__
-        if (0 == init)
+        if( 0 == init )
         {
-            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] =
-                upper_mask.s[3] = UPPER_MASK;
-            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] =
-                lower_mask.s[3] = LOWER_MASK;
+            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
+            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
             one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
-            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] =
-                MATRIX_A;
-            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL;
-            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL;
+            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
+            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
+            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
             init = 1;
         }
 #endif
@@ -141,89 +135,61 @@
         kk = 0;
 #ifdef __SSE2__
         // vector loop
-        for (; kk + 4 <= N - M; kk += 4)
+        for( ; kk + 4 <= N-M; kk += 4 )
         {
-            // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-            __m128i vy = _mm_or_si128(
-                _mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
-                              upper_mask.v),
-                _mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
-                              lower_mask.v));
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
 
-            // y & 1 ? -1 : 0
-            __m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
-            // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
-            __m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
-            // mt[kk+M] ^ (y >> 1)
-            __m128i vr =
-                _mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M)),
-                              (__m128i)_mm_srli_epi32(vy, 1));
-            // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
-            vr = _mm_xor_si128(vr, vmag01);
-            _mm_store_si128((__m128i *)(mt + kk), vr);
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) );    // mt[kk+M] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
         }
 #endif
-        for (; kk < N - M; kk++)
-        {
-            y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
-            mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        for ( ;kk<N-M;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
         }
 
 #ifdef __SSE2__
         // advance to next aligned location
-        for (; kk < N - 1 && (kk & 3); kk++)
-        {
-            y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
-            mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        for (;kk<N-1 && (kk & 3);kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
         }
 
         // vector loop
-        for (; kk + 4 <= N - 1; kk += 4)
+        for( ; kk + 4 <= N-1; kk += 4 )
         {
-            __m128i vy = _mm_or_si128(
-                _mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
-                              upper_mask.v),
-                // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-                _mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
-                              lower_mask.v));
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
 
-            // y & 1 ? -1 : 0
-            __m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
-            // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
-            __m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
-            // mt[kk+M-N] ^ (y >> 1)
-            __m128i vr =
-                _mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M - N)),
-                              _mm_srli_epi32(vy, 1));
-            // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
-            vr = _mm_xor_si128(vr, vmag01);
-            _mm_store_si128((__m128i *)(mt + kk), vr);
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) );          // mt[kk+M-N] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
         }
 #endif
 
-        for (; kk < N - 1; kk++)
-        {
-            y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
-            mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        for (;kk<N-1;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
         }
-        y = (cl_uint)((mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK));
-        mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
+        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
 
 #ifdef __SSE2__
         // Do the tempering ahead of time in vector code
-        for (kk = 0; kk + 4 <= N; kk += 4)
+        for( kk = 0; kk + 4 <= N; kk += 4 )
         {
-            // y = mt[k];
-            __m128i vy = _mm_load_si128((__m128i *)(mt + kk));
-            // y ^= (y >> 11);
-            vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 11));
-            // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
-            vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 7), c0.v));
-            // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
-            vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 15), c1.v));
-            // y ^= (y >> 18);
-            vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 18));
-            _mm_store_si128((__m128i *)(d->cache + kk), vy);
+            __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) );                            // y = mt[k];
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) );                             // y ^= (y >> 11);
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) );        // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) );       // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) );                             // y ^= (y >> 18);
+            _mm_store_si128( (__m128i*)(d->cache+kk), vy );
         }
 #endif
 
@@ -236,8 +202,8 @@
 
     /* Tempering */
     y ^= (y >> 11);
-    y ^= (y << 7) & (cl_uint)0x9d2c5680UL;
-    y ^= (y << 15) & (cl_uint)0xefc60000UL;
+    y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+    y ^= (y << 15) & (cl_uint) 0xefc60000UL;
     y ^= (y >> 18);
 #endif
 
@@ -245,35 +211,35 @@
     return y;
 }
 
-cl_ulong genrand_int64(MTdata d)
+cl_ulong genrand_int64( MTdata d)
 {
-    return ((cl_ulong)genrand_int32(d) << 32) | (cl_uint)genrand_int32(d);
+    return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
 }
 
 /* generates a random number on [0,1]-real-interval */
 double genrand_real1(MTdata d)
 {
-    return genrand_int32(d) * (1.0 / 4294967295.0);
+    return genrand_int32(d)*(1.0/4294967295.0);
     /* divided by 2^32-1 */
 }
 
 /* generates a random number on [0,1)-real-interval */
 double genrand_real2(MTdata d)
 {
-    return genrand_int32(d) * (1.0 / 4294967296.0);
+    return genrand_int32(d)*(1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
 /* generates a random number on (0,1)-real-interval */
 double genrand_real3(MTdata d)
 {
-    return (((double)genrand_int32(d)) + 0.5) * (1.0 / 4294967296.0);
+    return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
 /* generates a random number on [0,1) with 53-bit resolution*/
 double genrand_res53(MTdata d)
 {
-    unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
-    return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
+    unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
+    return(a*67108864.0+b)*(1.0/9007199254740992.0);
 }

diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
index 35c8493..85786e7 100644
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h

@@ -31,8 +31,8 @@
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
@@ -47,12 +47,12 @@
  */
 
 #ifndef MT19937_H
-#define MT19937_H 1
+#define MT19937_H   1
 
-#if defined(__APPLE__)
-#include <OpenCL/cl_platform.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
 #else
-#include <CL/cl_platform.h>
+    #include <CL/cl_platform.h>
 #endif
 
 /*
@@ -61,50 +61,52 @@
  *      on each thread.
  */
 
-typedef struct _MTdata *MTdata;
+typedef struct _MTdata  *MTdata;
 
 /* Create the random number generator with seed */
-MTdata init_genrand(cl_uint /*seed*/);
+MTdata init_genrand( cl_uint /*seed*/ );
 
 /* release memory used by a MTdata private data */
-void free_mtdata(MTdata /*data*/);
+void   free_mtdata( MTdata /*data*/ );
 
 /* generates a random number on [0,0xffffffff]-interval */
-cl_uint genrand_int32(MTdata /*data*/);
+cl_uint genrand_int32( MTdata /*data*/);
 
 /* generates a random number on [0,0xffffffffffffffffULL]-interval */
-cl_ulong genrand_int64(MTdata /*data*/);
+cl_ulong genrand_int64( MTdata /*data*/);
 
 /* generates a random number on [0,1]-real-interval */
-double genrand_real1(MTdata /*data*/);
+double genrand_real1( MTdata /*data*/);
 
 /* generates a random number on [0,1)-real-interval */
-double genrand_real2(MTdata /*data*/);
+double genrand_real2( MTdata /*data*/);
 
 /* generates a random number on (0,1)-real-interval */
-double genrand_real3(MTdata /*data*/);
+double genrand_real3( MTdata /*data*/);
 
 /* generates a random number on [0,1) with 53-bit resolution*/
-double genrand_res53(MTdata /*data*/);
+double genrand_res53( MTdata /*data*/ );
 
 
 #ifdef __cplusplus
 
 #include <cassert>
 
-struct MTdataHolder
-{
-    MTdataHolder(cl_uint seed)
-    {
+struct MTdataHolder {
+    MTdataHolder(cl_uint seed) {
         m_mtdata = init_genrand(seed);
         assert(m_mtdata != nullptr);
     }
 
-    MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {}
+    MTdataHolder(MTdata mtdata) : m_mtdata(mtdata) {}
 
-    ~MTdataHolder() { free_mtdata(m_mtdata); }
+    ~MTdataHolder() {
+        free_mtdata(m_mtdata);
+    }
 
-    operator MTdata() const { return m_mtdata; }
+    operator MTdata () const {
+        return m_mtdata;
+    }
 
 private:
     MTdata m_mtdata;
@@ -112,4 +114,4 @@
 
 #endif // #ifdef __cplusplus
 
-#endif /* MT19937_H */
+#endif  /* MT19937_H */

diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index cd350cf..00e7a6b 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,10 +20,10 @@
 // C++ interface.
 // =================================================================================================
 
-#include <cerrno> // errno, error constants
-#include <climits> // PATH_MAX
-#include <cstdlib> // abort, _splitpath, _makepath
-#include <cstring> // strdup, strerror_r
+#include <cerrno>     // errno, error constants
+#include <climits>    // PATH_MAX
+#include <cstdlib>    // abort, _splitpath, _makepath
+#include <cstring>    // strdup, strerror_r
 #include <sstream>
 
 #include <vector>
@@ -32,141 +32,131 @@
 #include <android/api-level.h>
 #endif
 
-#define CHECK_PTR(ptr)                                                         \
-    if ((ptr) == NULL)                                                         \
-    {                                                                          \
-        abort();                                                               \
+#define CHECK_PTR( ptr )    \
+    if ( (ptr) == NULL ) {  \
+        abort();            \
     }
 
-typedef std::vector<char> buffer_t;
+typedef std::vector< char > buffer_t;
 
-#if !defined(PATH_MAX)
-#define PATH_MAX 1000
+#if ! defined( PATH_MAX )
+    #define PATH_MAX 1000
 #endif
 
-int const _size = PATH_MAX + 1; // Initial buffer size for path.
-int const _count = 8; // How many times we will try to double buffer size.
+int const _size  = PATH_MAX + 1;    // Initial buffer size for path.
+int const _count = 8;               // How many times we will try to double buffer size.
 
 // -------------------------------------------------------------------------------------------------
 // MacOS X
 // -------------------------------------------------------------------------------------------------
 
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
 
 
-#include <mach-o/dyld.h> // _NSGetExecutablePath
-#include <libgen.h> // dirname
+    #include <mach-o/dyld.h>    // _NSGetExecutablePath
+    #include <libgen.h>         // dirname
 
 
-static std::string
-_err_msg(int err, // Error number (e. g. errno).
-         int level // Nesting level, for avoiding infinite recursion.
-)
-{
+    static
+    std::string
+    _err_msg(
+        int err,     // Error number (e. g. errno).
+        int level    // Nesting level, for avoiding infinite recursion.
+    ) {
 
-    /*
-        There are 3 incompatible versions of strerror_r:
+        /*
+            There are 3 incompatible versions of strerror_r:
 
-            char * strerror_r( int, char *, size_t );  // GNU version
-            int    strerror_r( int, char *, size_t );  // BSD version
-            int    strerror_r( int, char *, size_t );  // XSI version
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
 
-        BSD version returns error code, while XSI version returns 0 or -1 and
-       sets errno.
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
 
-    */
+        */
 
-    // BSD version of strerror_r.
-    buffer_t buffer(100);
-    int count = _count;
-    for (;;)
-    {
-        int rc = strerror_r(err, &buffer.front(), buffer.size());
-        if (rc == EINVAL)
-        {
-            // Error code is not recognized, but anyway we got the message.
-            return &buffer.front();
-        }
-        else if (rc == ERANGE)
-        {
-            // Buffer is not enough.
-            if (count > 0)
-            {
-                // Enlarge the buffer.
-                --count;
-                buffer.resize(buffer.size() * 2);
-            }
-            else
-            {
+        // BSD version of strerror_r.
+        buffer_t buffer( 100 );
+        int      count = _count;
+        for ( ; ; ) {
+            int rc = strerror_r( err, & buffer.front(), buffer.size() );
+            if ( rc == EINVAL ) {
+                // Error code is not recognized, but anyway we got the message.
+                return & buffer.front();
+            } else if ( rc == ERANGE ) {
+                // Buffer is not enough.
+                if ( count > 0 ) {
+                    // Enlarge the buffer.
+                    -- count;
+                    buffer.resize( buffer.size() * 2 );
+                } else {
+                    std::stringstream ostr;
+                    ostr
+                        << "Error " << err << " "
+                        << "(Getting error message failed: "
+                        << "Buffer of " << buffer.size() << " bytes is still too small"
+                        << ")";
+                    return ostr.str();
+                }; // if
+            } else if ( rc == 0 ) {
+                // We got the message.
+                return & buffer.front();
+            } else {
                 std::stringstream ostr;
-                ostr << "Error " << err << " "
-                     << "(Getting error message failed: "
-                     << "Buffer of " << buffer.size()
-                     << " bytes is still too small"
-                     << ")";
+                ostr
+                    << "Error " << err << " "
+                    << "(Getting error message failed: "
+                    << ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
+                    << ")";
                 return ostr.str();
             }; // if
-        }
-        else if (rc == 0)
-        {
-            // We got the message.
-            return &buffer.front();
-        }
-        else
-        {
-            std::stringstream ostr;
-            ostr << "Error " << err << " "
-                 << "(Getting error message failed: "
-                 << (level < 2 ? _err_msg(rc, level + 1) : "Oops") << ")";
-            return ostr.str();
-        }; // if
-    }; // forever
+        }; // forever
 
-} // _err_msg
+    } // _err_msg
 
 
-std::string dir_sep() { return "/"; } // dir_sep
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
 
 
-std::string exe_path()
-{
-    buffer_t path(_size);
-    int count = _count;
-    for (;;)
-    {
-        uint32_t size = path.size();
-        int rc = _NSGetExecutablePath(&path.front(), &size);
-        if (rc == 0)
-        {
-            break;
-        }; // if
-        if (count > 0)
-        {
-            --count;
-            path.resize(size);
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "_NSGetExecutablePath failed: Buffer of %lu bytes is "
-                      "still too small\n",
-                      (unsigned long)path.size());
-            exit(2);
-        }; // if
-    }; // forever
-    return &path.front();
-} // exe_path
+    std::string
+    exe_path(
+    ) {
+        buffer_t path( _size );
+        int      count = _count;
+        for ( ; ; ) {
+            uint32_t size = path.size();
+            int rc = _NSGetExecutablePath( & path.front(), & size );
+            if ( rc == 0 ) {
+                break;
+            }; // if
+            if ( count > 0 ) {
+                -- count;
+                path.resize( size );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
+        return & path.front();
+    } // exe_path
 
 
-std::string exe_dir()
-{
-    std::string path = exe_path();
-    // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its
-    // argument.
-    buffer_t buffer(path.c_str(),
-                    path.c_str() + path.size() + 1); // Copy with trailing zero.
-    return dirname(&buffer.front());
-} // exe_dir
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
 
 
 #endif // __APPLE__
@@ -175,153 +165,149 @@
 // Linux
 // -------------------------------------------------------------------------------------------------
 
-#if defined(__linux__)
+#if defined( __linux__ )
 
 
-#include <cerrno> // errno
-#include <libgen.h> // dirname
-#include <unistd.h> // readlink
+    #include <cerrno>      // errno
+    #include <libgen.h>    // dirname
+    #include <unistd.h>    // readlink
 
 
-static std::string _err_msg(int err, int level)
-{
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
 
-    /*
-        There are 3 incompatible versions of strerror_r:
+        /*
+            There are 3 incompatible versions of strerror_r:
 
-            char * strerror_r( int, char *, size_t );  // GNU version
-            int    strerror_r( int, char *, size_t );  // BSD version
-            int    strerror_r( int, char *, size_t );  // XSI version
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
 
-        BSD version returns error code, while XSI version returns 0 or -1 and
-       sets errno.
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
 
-    */
+        */
 
-#if (defined(__ANDROID__) && __ANDROID_API__ < 23)                             \
-    || ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE)
+        #if (defined(__ANDROID__) && __ANDROID_API__ < 23) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
 
-// XSI version of strerror_r.
-#warning Not tested!
-    buffer_t buffer(200);
-    int count = _count;
-    for (;;)
-    {
-        int rc = strerror_r(err, &buffer.front(), buffer.size());
-        if (rc == -1)
-        {
-            int _err = errno;
-            if (_err == ERANGE)
-            {
-                if (count > 0)
-                {
-                    // Enlarge the buffer.
-                    --count;
-                    buffer.resize(buffer.size() * 2);
-                }
-                else
-                {
-                    std::stringstream ostr;
-                    ostr << "Error " << err << " "
-                         << "(Getting error message failed: "
-                         << "Buffer of " << buffer.size()
-                         << " bytes is still too small"
-                         << ")";
-                    return ostr.str();
+            // XSI version of strerror_r.
+            #warning Not tested!
+            buffer_t buffer( 200 );
+            int      count = _count;
+            for ( ; ; ) {
+                int rc = strerror_r( err, & buffer.front(), buffer.size() );
+                if ( rc == -1 ) {
+                    int _err = errno;
+                    if ( _err == ERANGE ) {
+                        if ( count > 0 ) {
+                            // Enlarge the buffer.
+                            -- count;
+                            buffer.resize( buffer.size() * 2 );
+                        } else {
+                            std::stringstream ostr;
+                            ostr
+                                << "Error " << err << " "
+                                << "(Getting error message failed: "
+                                << "Buffer of " << buffer.size() << " bytes is still too small"
+                                << ")";
+                            return ostr.str();
+                        }; // if
+                    } else {
+                        std::stringstream ostr;
+                        ostr
+                            << "Error " << err << " "
+                            << "(Getting error message failed: "
+                            << ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
+                            << ")";
+                        return ostr.str();
+                    }; // if
+                } else {
+                    // We got the message.
+                    return & buffer.front();
                 }; // if
-            }
-            else
-            {
-                std::stringstream ostr;
-                ostr << "Error " << err << " "
-                     << "(Getting error message failed: "
-                     << (level < 2 ? _err_msg(_err, level + 1) : "Oops") << ")";
-                return ostr.str();
+            }; // forever
+
+        #else
+
+            // GNU version of strerror_r.
+            char buffer[ 2000 ];
+            return strerror_r( err, buffer, sizeof( buffer ) );
+
+        #endif
+
+    } // _err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+
+        static std::string const exe = "/proc/self/exe";
+
+        buffer_t    path( _size );
+        int         count = _count;  // Max number of iterations.
+
+        for ( ; ; ) {
+
+            ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
+
+            if ( len < 0 ) {
+                // Oops.
+                int err = errno;
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: %s\n",
+                    exe.c_str(), err_msg( err ).c_str()
+                );
+                exit( 2 );
             }; // if
-        }
-        else
-        {
-            // We got the message.
-            return &buffer.front();
-        }; // if
-    }; // forever
 
-#else
+            if ( len < path.size() ) {
+                // We got the path.
+                path.resize( len );
+                break;
+            }; // if
 
-    // GNU version of strerror_r.
-    char buffer[2000];
-    return strerror_r(err, buffer, sizeof(buffer));
+            // Oops, buffer is too small.
+            if ( count > 0 ) {
+                -- count;
+                // Enlarge the buffer.
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
+                    exe.c_str(),
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
 
-#endif
+        }; // forever
 
-} // _err_msg
+        return std::string( & path.front(), path.size() );
+
+    } // exe_path
 
 
-std::string dir_sep() { return "/"; } // dir_sep
-
-
-std::string exe_path()
-{
-
-    static std::string const exe = "/proc/self/exe";
-
-    buffer_t path(_size);
-    int count = _count; // Max number of iterations.
-
-    for (;;)
-    {
-
-        ssize_t len = readlink(exe.c_str(), &path.front(), path.size());
-
-        if (len < 0)
-        {
-            // Oops.
-            int err = errno;
-            log_error("ERROR: Getting executable path failed: "
-                      "Reading symlink `%s' failed: %s\n",
-                      exe.c_str(), err_msg(err).c_str());
-            exit(2);
-        }; // if
-
-        if (len < path.size())
-        {
-            // We got the path.
-            path.resize(len);
-            break;
-        }; // if
-
-        // Oops, buffer is too small.
-        if (count > 0)
-        {
-            --count;
-            // Enlarge the buffer.
-            path.resize(path.size() * 2);
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "Reading symlink `%s' failed: Buffer of %lu bytes is "
-                      "still too small\n",
-                      exe.c_str(), (unsigned long)path.size());
-            exit(2);
-        }; // if
-
-    }; // forever
-
-    return std::string(&path.front(), path.size());
-
-} // exe_path
-
-
-std::string exe_dir()
-{
-    std::string path = exe_path();
-    // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its
-    // argument.
-    buffer_t buffer(path.c_str(),
-                    path.c_str() + path.size() + 1); // Copy with trailing zero.
-    return dirname(&buffer.front());
-} // exe_dir
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
 
 #endif // __linux__
 
@@ -329,196 +315,212 @@
 // MS Windows
 // -------------------------------------------------------------------------------------------------
 
-#if defined(_WIN32)
+#if defined( _WIN32 )
 
 
-#include <windows.h>
-#if defined(max)
-#undef max
-#endif
+    #include <windows.h>
+    #if defined( max )
+        #undef max
+    #endif
 
-#include <cctype>
-#include <algorithm>
+    #include <cctype>
+    #include <algorithm>
 
 
-static std::string _err_msg(int err, int level)
-{
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
 
-    std::string msg;
+        std::string msg;
 
-    LPSTR buffer = NULL;
-    DWORD flags = FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM
-        | FORMAT_MESSAGE_IGNORE_INSERTS;
+        LPSTR  buffer = NULL;
+        DWORD  flags  =
+            FORMAT_MESSAGE_ALLOCATE_BUFFER |
+            FORMAT_MESSAGE_FROM_SYSTEM |
+            FORMAT_MESSAGE_IGNORE_INSERTS;
 
-    DWORD len = FormatMessageA(flags, NULL, err, LANG_USER_DEFAULT,
-                               reinterpret_cast<LPSTR>(&buffer), 0, NULL);
-
-    if (buffer == NULL || len == 0)
-    {
-
-        int _err = GetLastError();
-        char str[1024] = { 0 };
-        snprintf(str, sizeof(str),
-                 "Error 0x%08x (Getting error message failed: %s )", err,
-                 (level < 2 ? _err_msg(_err, level + 1).c_str() : "Oops"));
-        msg = std::string(str);
-    }
-    else
-    {
-
-        // Trim trailing whitespace (including `\r' and `\n').
-        while (len > 0 && isspace(buffer[len - 1]))
-        {
-            --len;
-        }; // while
-
-        // Drop trailing full stop.
-        if (len > 0 && buffer[len - 1] == '.')
-        {
-            --len;
-        }; // if
-
-        msg.assign(buffer, len);
-
-    }; // if
-
-    if (buffer != NULL)
-    {
-        LocalFree(buffer);
-    }; // if
-
-    return msg;
-
-} // _get_err_msg
-
-
-std::string dir_sep() { return "\\"; } // dir_sep
-
-
-std::string exe_path()
-{
-
-    buffer_t path(_size);
-    int count = _count;
-
-    for (;;)
-    {
-
-        DWORD len = GetModuleFileNameA(NULL, &path.front(), path.size());
-
-        if (len == 0)
-        {
-            int err = GetLastError();
-            log_error("ERROR: Getting executable path failed: %s\n",
-                      err_msg(err).c_str());
-            exit(2);
-        }; // if
-
-        if (len < path.size())
-        {
-            path.resize(len);
-            break;
-        }; // if
-
-        // Buffer too small.
-        if (count > 0)
-        {
-            --count;
-            path.resize(path.size() * 2);
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "Buffer of %lu bytes is still too small\n",
-                      (unsigned long)path.size());
-            exit(2);
-        }; // if
-
-    }; // forever
-
-    return std::string(&path.front(), path.size());
-
-} // exe_path
-
-
-std::string exe_dir()
-{
-
-    std::string exe = exe_path();
-    int count = 0;
-
-    // Splitting path into components.
-    buffer_t drv(_MAX_DRIVE);
-    buffer_t dir(_MAX_DIR);
-    count = _count;
-#if defined(_MSC_VER)
-    for (;;)
-    {
-        int rc =
-            _splitpath_s(exe.c_str(), &drv.front(), drv.size(), &dir.front(),
-                         dir.size(), NULL, 0, // We need neither name
-                         NULL, 0 // nor extension
+        DWORD len =
+            FormatMessageA(
+                flags,
+                NULL,
+                err,
+                LANG_USER_DEFAULT,
+                reinterpret_cast< LPSTR >( & buffer ),
+                0,
+                NULL
             );
-        if (rc == 0)
-        {
-            break;
-        }
-        else if (rc == ERANGE)
-        {
-            if (count > 0)
-            {
-                --count;
-                // Buffer is too small, but it is not clear which one.
-                // So we have to enlarge all.
-                drv.resize(drv.size() * 2);
-                dir.resize(dir.size() * 2);
-            }
-            else
-            {
-                log_error("ERROR: Getting executable path failed: "
-                          "Splitting path `%s' to components failed: "
-                          "Buffers of %lu and %lu bytes are still too small\n",
-                          exe.c_str(), (unsigned long)drv.size(),
-                          (unsigned long)dir.size());
-                exit(2);
+
+        if ( buffer == NULL || len == 0 ) {
+
+            int _err = GetLastError();
+            char str[1024] = { 0 };
+            snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
+            msg = std::string(str);
+
+        } else {
+
+            // Trim trailing whitespace (including `\r' and `\n').
+            while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
+                -- len;
+            }; // while
+
+            // Drop trailing full stop.
+            if ( len > 0 && buffer[ len - 1 ] == '.' ) {
+                -- len;
             }; // if
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "Splitting path `%s' to components failed: %s\n",
-                      exe.c_str(), err_msg(rc).c_str());
-            exit(2);
+
+            msg.assign( buffer, len );
+
+        }; //if
+
+        if ( buffer != NULL ) {
+            LocalFree( buffer );
         }; // if
-    }; // forever
+
+        return msg;
+
+    } // _get_err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "\\";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+
+        buffer_t path( _size );
+        int      count = _count;
+
+        for ( ; ; ) {
+
+            DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
+
+            if ( len == 0 ) {
+                int err = GetLastError();
+                log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
+                exit( 2 );
+            }; // if
+
+            if ( len < path.size() ) {
+                path.resize( len );
+                break;
+            }; // if
+
+            // Buffer too small.
+            if ( count > 0 ) {
+                -- count;
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+
+        }; // forever
+
+        return std::string( & path.front(), path.size() );
+
+    } // exe_path
+
+
+    std::string
+    exe_dir(
+    ) {
+
+        std::string exe = exe_path();
+        int count = 0;
+
+        // Splitting path into components.
+        buffer_t drv( _MAX_DRIVE );
+        buffer_t dir( _MAX_DIR   );
+        count = _count;
+#if defined(_MSC_VER)
+        for ( ; ; ) {
+            int rc =
+                _splitpath_s(
+                    exe.c_str(),
+                    & drv.front(), drv.size(),
+                    & dir.front(), dir.size(),
+                    NULL, 0,   // We need neither name
+                    NULL, 0    // nor extension
+                );
+            if ( rc == 0 ) {
+                break;
+            } else if ( rc == ERANGE ) {
+                if ( count > 0 ) {
+                    -- count;
+                    // Buffer is too small, but it is not clear which one.
+                    // So we have to enlarge all.
+                    drv.resize( drv.size() * 2 );
+                    dir.resize( dir.size() * 2 );
+                } else {
+                    log_error(
+                        "ERROR: Getting executable path failed: "
+                        "Splitting path `%s' to components failed: "
+                        "Buffers of %lu and %lu bytes are still too small\n",
+                        exe.c_str(),
+                        (unsigned long) drv.size(),
+                        (unsigned long) dir.size()
+                    );
+                    exit( 2 );
+                }; // if
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Splitting path `%s' to components failed: %s\n",
+                    exe.c_str(),
+                    err_msg( rc ).c_str()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
 
 #else // __MINGW32__
 
-    // MinGW does not have the "secure" _splitpath_s, use the insecure version
-    // instead.
-    _splitpath(exe.c_str(), &drv.front(), &dir.front(),
-               NULL, // We need neither name
-               NULL // nor extension
-    );
+        // MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
+        _splitpath(
+            exe.c_str(),
+            & drv.front(),
+            & dir.front(),
+            NULL,   // We need neither name
+            NULL    // nor extension
+        );
 #endif // __MINGW32__
 
-    // Combining components back to path.
-    // I failed with "secure" `_makepath_s'. If buffer is too small, instead of
-    // returning ERANGE, `_makepath_s' pops up dialog box and offers to debug
-    // the program. D'oh! So let us try to guess the size of result and go with
-    // insecure `_makepath'.
-    buffer_t path(std::max(drv.size() + dir.size(), size_t(_MAX_PATH)) + 10);
-    _makepath(&path.front(), &drv.front(), &dir.front(), NULL, NULL);
+        // Combining components back to path.
+        // I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
+        // ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
+        // So let us try to guess the size of result and go with insecure `_makepath'.
+        buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
+        _makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
 
-    return &path.front();
+        return & path.front();
 
-} // exe_dir
+    } // exe_dir
 
 
 #endif // _WIN32
 
 
-std::string err_msg(int err) { return _err_msg(err, 0); } // err_msg
+std::string
+err_msg(
+    int err
+) {
+
+    return _err_msg( err, 0 );
+
+} // err_msg
 
 
 // =================================================================================================
@@ -526,34 +528,39 @@
 // =================================================================================================
 
 
-char* get_err_msg(int err)
-{
-    char* msg = strdup(err_msg(err).c_str());
-    CHECK_PTR(msg);
+char *
+get_err_msg(
+    int err
+) {
+    char * msg = strdup( err_msg( err ).c_str() );
+    CHECK_PTR( msg );
     return msg;
 } // get_err_msg
 
 
-char* get_dir_sep()
-{
-    char* sep = strdup(dir_sep().c_str());
-    CHECK_PTR(sep);
+char *
+get_dir_sep(
+) {
+    char * sep = strdup( dir_sep().c_str() );
+    CHECK_PTR( sep );
     return sep;
 } // get_dir_sep
 
 
-char* get_exe_path()
-{
-    char* path = strdup(exe_path().c_str());
-    CHECK_PTR(path);
+char *
+get_exe_path(
+) {
+    char * path = strdup( exe_path().c_str() );
+    CHECK_PTR( path );
     return path;
 } // get_exe_path
 
 
-char* get_exe_dir()
-{
-    char* dir = strdup(exe_dir().c_str());
-    CHECK_PTR(dir);
+char *
+get_exe_dir(
+) {
+    char * dir = strdup( exe_dir().c_str() );
+    CHECK_PTR( dir );
     return dir;
 } // get_exe_dir
 

diff --git a/test_common/harness/os_helpers.h b/test_common/harness/os_helpers.h
index aa3080d..7c4463f 100644
--- a/test_common/harness/os_helpers.h
+++ b/test_common/harness/os_helpers.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,12 +24,12 @@
 
 #ifdef __cplusplus
 
-#include <string>
+    #include <string>
 
-std::string err_msg(int err);
-std::string dir_sep();
-std::string exe_path();
-std::string exe_dir();
+    std::string err_msg( int err );
+    std::string dir_sep();
+    std::string exe_path();
+    std::string exe_dir();
 
 #endif // __cplusplus
 
@@ -37,9 +37,9 @@
 // C interface.
 // -------------------------------------------------------------------------------------------------
 
-char* get_err_msg(int err); // Returns system error message. Subject to free.
-char* get_dir_sep(); // Returns dir separator. Subject to free.
-char* get_exe_path(); // Returns path of current executable. Subject to free.
-char* get_exe_dir(); // Returns dir of current executable. Subject to free.
+char * get_err_msg( int err );  // Returns system error message. Subject to free.
+char * get_dir_sep();           // Returns dir separator. Subject to free.
+char * get_exe_path();          // Returns path of current executable. Subject to free.
+char * get_exe_dir();           // Returns dir of current executable. Subject to free.
 
 #endif // __os_helpers_h__

diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index b2ab5b0..1706730 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -29,58 +29,47 @@
 
 #define DEFAULT_COMPILATION_PROGRAM "cl_offline_compiler"
 
-CompilationMode gCompilationMode = kOnline;
+CompilationMode      gCompilationMode = kOnline;
 CompilationCacheMode gCompilationCacheMode = kCacheModeCompileIfAbsent;
-std::string gCompilationCachePath = ".";
-std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
+std::string          gCompilationCachePath = ".";
+std::string          gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
 
-void helpInfo()
+void helpInfo ()
 {
-    log_info(
-        R"(Common options:
-    -h, --help
-        This help
-    --compilation-mode <mode>
-        Specify a compilation mode.  Mode can be:
-            online     Use online compilation (default)
-            binary     Use binary offline compilation
-            spir-v     Use SPIR-V offline compilation
-
-For offline compilation (binary and spir-v modes) only:
-    --compilation-cache-mode <cache-mode>
-        Specify a compilation caching mode:
-            compile-if-absent
-                Read from cache if already populated, or else perform
-                offline compilation (default)
-            force-read
-                Force reading from the cache
-            overwrite
-                Disable reading from the cache
-            dump-cl-files
-                Dumps the .cl and build .options files used by the test suite
-    --compilation-cache-path <path>
-        Path for offline compiler output and CL source
-    --compilation-program <prog>
-        Program to use for offline compilation, defaults to:
-            )" DEFAULT_COMPILATION_PROGRAM "\n\n");
+    log_info("Common options:\n"
+             "        -h, --help                  This help\n"
+             "        --compilation-mode <mode>   Specify a compilation mode.  Mode can be:\n"
+             "                           online     Use online compilation (default)\n"
+             "                           binary     Use binary offline compilation\n"
+             "                           spir-v     Use SPIR-V offline compilation\n"
+             "\n"
+             "    For offline compilation (binary and spir-v modes) only:\n"
+             "        --compilation-cache-mode <cache-mode>  Specify a compilation caching mode:\n"
+             "                                 compile-if-absent  Read from cache if already populated, or\n"
+             "                                                    else perform offline compilation (default)\n"
+             "                                 force-read        Force reading from the cache\n"
+             "                                 overwrite         Disable reading from the cache\n"
+             "                                 dump-cl-files     Dumps the .cl and build .options files used by the test suite\n"
+             "        --compilation-cache-path <path>   Path for offline compiler output and CL source\n"
+             "        --compilation-program <prog>      Program to use for offline compilation,\n"
+             "                                          defaults to " DEFAULT_COMPILATION_PROGRAM "\n"
+             "\n");
 }
 
-int parseCustomParam(int argc, const char *argv[], const char *ignore)
+int parseCustomParam (int argc, const char *argv[], const char *ignore)
 {
     int delArg = 0;
 
-    for (int i = 1; i < argc; i++)
+    for (int i=1; i<argc; i++)
     {
-        if (ignore != 0)
+        if(ignore != 0)
         {
-            // skip parameters that require special/different treatment in
-            // application (generic interpretation and parameter removal will
-            // not be performed)
-            const char *ptr = strstr(ignore, argv[i]);
-            if (ptr != 0 && (ptr == ignore || ptr[-1] == ' ')
-                && // first on list or ' ' before
-                (ptr[strlen(argv[i])] == 0
-                 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
+            // skip parameters that require special/different treatment in application
+            // (generic interpretation and parameter removal will not be performed)
+            const char * ptr = strstr(ignore, argv[i]);
+            if(ptr != 0 &&
+               (ptr == ignore || ptr[-1] == ' ') && //first on list or ' ' before
+               (ptr[strlen(argv[i])] == 0 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
                 continue;
         }
 
@@ -91,7 +80,7 @@
             // Note: we don't increment delArg to delete this argument,
             // to allow the caller's argument parsing routine to see the
             // option and print its own help.
-            helpInfo();
+            helpInfo ();
         }
         else if (!strcmp(argv[i], "--compilation-mode"))
         {
@@ -153,18 +142,15 @@
                 }
                 else
                 {
-                    log_error("Compilation cache mode not recognized: %s\n",
-                              mode);
+                    log_error("Compilation cache mode not recognized: %s\n", mode);
                     return -1;
                 }
                 log_info("Compilation cache mode specified: %s\n", mode);
             }
             else
             {
-                log_error(
-                    "Compilation cache mode parameters are incorrect. Usage:\n"
-                    "  --compilation-cache-mode "
-                    "<compile-if-absent|force-read|overwrite>\n");
+                log_error("Compilation cache mode parameters are incorrect. Usage:\n"
+                          "  --compilation-cache-mode <compile-if-absent|force-read|overwrite>\n");
                 return -1;
             }
         }
@@ -178,8 +164,7 @@
             }
             else
             {
-                log_error("Path argument for --compilation-cache-path was not "
-                          "specified.\n");
+                log_error("Path argument for --compilation-cache-path was not specified.\n");
                 return -1;
             }
         }
@@ -193,34 +178,34 @@
             }
             else
             {
-                log_error("Program argument for --compilation-program was not "
-                          "specified.\n");
+                log_error("Program argument for --compilation-program was not specified.\n");
                 return -1;
             }
         }
 
-        // cleaning parameters from argv tab
-        for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];
+        //cleaning parameters from argv tab
+        for (int j = i; j < argc - delArg; j++)
+            argv[j] = argv[j + delArg];
         argc -= delArg;
         i -= delArg;
     }
 
-    if ((gCompilationCacheMode == kCacheModeForceRead
-         || gCompilationCacheMode == kCacheModeOverwrite)
-        && gCompilationMode == kOnline)
+    if ((gCompilationCacheMode == kCacheModeForceRead || gCompilationCacheMode == kCacheModeOverwrite)
+         && gCompilationMode == kOnline)
     {
-        log_error("Compilation cache mode can only be specified when using an "
-                  "offline compilation mode.\n");
+        log_error("Compilation cache mode can only be specified when using an offline compilation mode.\n");
         return -1;
     }
 
     return argc;
 }
 
-bool is_power_of_two(int number) { return number && !(number & (number - 1)); }
+bool is_power_of_two(int number)
+{
+    return number && !(number & (number - 1));
+}
 
-extern void parseWimpyReductionFactor(const char *&arg,
-                                      int &wimpyReductionFactor)
+extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
 {
     const char *arg_temp = strchr(&arg[1], ']');
     if (arg_temp != 0)
@@ -229,15 +214,12 @@
         arg = arg_temp; // Advance until ']'
         if (is_power_of_two(new_factor))
         {
-            log_info("\n Wimpy reduction factor changed from %d to %d \n",
-                     wimpyReductionFactor, new_factor);
+            log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
             wimpyReductionFactor = new_factor;
         }
         else
         {
-            log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be "
-                     "power of 2. The default value will be used.\n",
-                     new_factor);
+            log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
         }
     }
 }

diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h
index b0f8328..5dc28c5 100644
--- a/test_common/harness/parseParameters.h
+++ b/test_common/harness/parseParameters.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -39,10 +39,8 @@
 extern std::string gCompilationCachePath;
 extern std::string gCompilationProgram;
 
-extern int parseCustomParam(int argc, const char *argv[],
-                            const char *ignore = 0);
+extern int parseCustomParam (int argc, const char *argv[], const char *ignore = 0 );
 
-extern void parseWimpyReductionFactor(const char *&arg,
-                                      int &wimpyReductionFactor);
+extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
 
 #endif // _parseParameters_h

diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp
deleted file mode 100644
index 3157ca8..0000000
--- a/test_common/harness/propertyHelpers.cpp
+++ /dev/null

@@ -1,126 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "propertyHelpers.h"
-#include "errorHelpers.h"
-
-#include <assert.h>
-
-#include <algorithm>
-#include <vector>
-
-static bool findProperty(const std::vector<cl_properties>& props,
-                         cl_properties prop, cl_properties& value)
-{
-    // This function assumes properties are valid:
-    assert(props.size() == 0 || props.back() == 0);
-    assert(props.size() == 0 || props.size() % 2 == 1);
-
-    for (cl_uint i = 0; i < props.size(); i = i + 2)
-    {
-        cl_properties check_prop = props[i];
-
-        if (check_prop == 0)
-        {
-            break;
-        }
-
-        if (check_prop == prop)
-        {
-            value = props[i + 1];
-            return true;
-        }
-    }
-
-    return false;
-}
-
-int compareProperties(const std::vector<cl_properties>& queried,
-                      const std::vector<cl_properties>& check)
-{
-    if (queried.size() != 0)
-    {
-        if (queried.back() != 0)
-        {
-            log_error("ERROR: queried properties do not end with 0!\n");
-            return TEST_FAIL;
-        }
-        if (queried.size() % 2 != 1)
-        {
-            log_error("ERROR: queried properties does not consist of "
-                      "property-value pairs!\n");
-            return TEST_FAIL;
-        }
-    }
-    if (check.size() != 0)
-    {
-        if (check.back() != 0)
-        {
-            log_error("ERROR: check properties do not end with 0!\n");
-            return TEST_FAIL;
-        }
-        if (check.size() % 2 != 1)
-        {
-            log_error("ERROR: check properties does not consist of "
-                      "property-value pairs!\n");
-            return TEST_FAIL;
-        }
-    }
-
-    if (queried != check)
-    {
-        for (cl_uint i = 0; i < check.size(); i = i + 2)
-        {
-            cl_properties check_prop = check[i];
-
-            if (check_prop == 0)
-            {
-                break;
-            }
-
-            cl_properties check_value = check[i + 1];
-            cl_properties queried_value = 0;
-
-            bool found = findProperty(queried, check_prop, queried_value);
-
-            if (!found)
-            {
-                log_error("ERROR: expected property 0x%x not found!\n",
-                          check_prop);
-                return TEST_FAIL;
-            }
-            else if (check_value != queried_value)
-            {
-                log_error("ERROR: mis-matched value for property 0x%x: wanted "
-                          "0x%x, got 0x%x\n",
-                          check_prop, check_value, queried_value);
-                return TEST_FAIL;
-            }
-        }
-
-        if (queried.size() > check.size())
-        {
-            log_error("ERROR: all properties found but there are extra "
-                      "properties: expected %d, got %d.\n",
-                      check.size(), queried.size());
-            return TEST_FAIL;
-        }
-
-        log_error("ERROR: properties were returned in the wrong order.\n");
-        return TEST_FAIL;
-    }
-
-    return TEST_PASS;
-}

diff --git a/test_common/harness/propertyHelpers.h b/test_common/harness/propertyHelpers.h
deleted file mode 100644
index 68d16f6..0000000
--- a/test_common/harness/propertyHelpers.h
+++ /dev/null

@@ -1,27 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _propertyHelpers_h
-#define _propertyHelpers_h
-
-#include "compat.h"
-#include "testHarness.h"
-
-#include <vector>
-
-int compareProperties(const std::vector<cl_properties>& queried,
-                      const std::vector<cl_properties>& check);
-
-#endif // _propertyHelpers_h

diff --git a/test_common/harness/ref_counting.h b/test_common/harness/ref_counting.h
index cd6a316..1a2acee 100644
--- a/test_common/harness/ref_counting.h
+++ b/test_common/harness/ref_counting.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,39 +16,34 @@
 #ifndef _ref_counting_h
 #define _ref_counting_h
 
-#define MARK_REF_COUNT_BASE(c, type, bigType)                                  \
-    cl_uint c##_refCount;                                                      \
-    error = clGet##type##Info(c, CL_##bigType##_REFERENCE_COUNT,               \
-                              sizeof(c##_refCount), &c##_refCount, NULL);      \
-    test_error(error, "Unable to check reference count for " #type);
+#define MARK_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
+    test_error( error, "Unable to check reference count for " #type );
 
-#define TEST_REF_COUNT_BASE(c, type, bigType)                                  \
-    cl_uint c##_refCount_new;                                                  \
-    error =                                                                    \
-        clGet##type##Info(c, CL_##bigType##_REFERENCE_COUNT,                   \
-                          sizeof(c##_refCount_new), &c##_refCount_new, NULL);  \
-    test_error(error, "Unable to check reference count for " #type);           \
-    if (c##_refCount != c##_refCount_new)                                      \
-    {                                                                          \
-        log_error("ERROR: Reference count for " #type                          \
-                  " changed! (was %d, now %d)\n",                              \
-                  c##_refCount, c##_refCount_new);                             \
-        return -1;                                                             \
+#define TEST_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount_new; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
+    test_error( error, "Unable to check reference count for " #type ); \
+    if( c##_refCount != c##_refCount_new ) \
+    {    \
+        log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new );    \
+        return -1; \
     }
 
-#define MARK_REF_COUNT_CONTEXT(c) MARK_REF_COUNT_BASE(c, Context, CONTEXT)
-#define TEST_REF_COUNT_CONTEXT(c) TEST_REF_COUNT_BASE(c, Context, CONTEXT)
+#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
+#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
 
-#define MARK_REF_COUNT_DEVICE(c) MARK_REF_COUNT_BASE(c, Device, DEVICE)
-#define TEST_REF_COUNT_DEVICE(c) TEST_REF_COUNT_BASE(c, Device, DEVICE)
+#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
+#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
 
-#define MARK_REF_COUNT_QUEUE(c) MARK_REF_COUNT_BASE(c, CommandQueue, QUEUE)
-#define TEST_REF_COUNT_QUEUE(c) TEST_REF_COUNT_BASE(c, CommandQueue, QUEUE)
+#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
 
-#define MARK_REF_COUNT_PROGRAM(c) MARK_REF_COUNT_BASE(c, Program, PROGRAM)
-#define TEST_REF_COUNT_PROGRAM(c) TEST_REF_COUNT_BASE(c, Program, PROGRAM)
+#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
+#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
 
-#define MARK_REF_COUNT_MEM(c) MARK_REF_COUNT_BASE(c, MemObject, MEM)
-#define TEST_REF_COUNT_MEM(c) TEST_REF_COUNT_BASE(c, MemObject, MEM)
+#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
+#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
 
 #endif // _ref_counting_h

diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp
index 681ccdd..ff38a7e 100644
--- a/test_common/harness/rounding_mode.cpp
+++ b/test_common/harness/rounding_mode.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,49 +15,46 @@
 //
 #include "rounding_mode.h"
 
-#if (defined(__arm__) || defined(__aarch64__))
-#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
-#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
+#if (defined( __arm__ ) || defined(__aarch64__))
+    #define FPSCR_FZ    (1 << 24)       // Flush-To-Zero mode
+    #define FPSCR_ROUND_MASK (3 << 22)  // Rounding mode:
 
-#define _ARM_FE_FTZ 0x1000000
-#define _ARM_FE_NFTZ 0x0
-#if defined(__aarch64__)
-#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))
-#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))
-#else
-#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))
-#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))
-#endif
+    #define _ARM_FE_FTZ     0x1000000
+    #define _ARM_FE_NFTZ    0x0
+    #if defined(__aarch64__)
+        #define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
+    #else
+        #define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
+    #endif
 #endif
 
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-#define _ARM_FE_TONEAREST 0x0
-#define _ARM_FE_UPWARD 0x400000
-#define _ARM_FE_DOWNWARD 0x800000
-#define _ARM_FE_TOWARDZERO 0xc00000
-RoundingMode set_round(RoundingMode r, Type outType)
+#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
+#define _ARM_FE_TONEAREST           0x0
+#define _ARM_FE_UPWARD              0x400000
+#define _ARM_FE_DOWNWARD            0x800000
+#define _ARM_FE_TOWARDZERO          0xc00000
+RoundingMode set_round( RoundingMode r, Type outType )
 {
-    static const int flt_rounds[kRoundingModeCount] = {
-        _ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
-        _ARM_FE_TOWARDZERO
-    };
-    static const int int_rounds[kRoundingModeCount] = {
-        _ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
-        _ARM_FE_TOWARDZERO
-    };
+    static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
     const int *p = int_rounds;
-    if (outType == kfloat || outType == kdouble) p = flt_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
 
     int fpscr = 0;
     RoundingMode oldRound = get_round();
 
     _FPU_GETCW(fpscr);
-    _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK));
+    _FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
 
     return oldRound;
 }
 
-RoundingMode get_round(void)
+RoundingMode get_round( void )
 {
     int fpscr;
     int oldRound;
@@ -65,192 +62,180 @@
     _FPU_GETCW(fpscr);
     oldRound = (fpscr & FPSCR_ROUND_MASK);
 
-    switch (oldRound)
+    switch( oldRound )
     {
-        case _ARM_FE_TONEAREST: return kRoundToNearestEven;
-        case _ARM_FE_UPWARD: return kRoundUp;
-        case _ARM_FE_DOWNWARD: return kRoundDown;
-        case _ARM_FE_TOWARDZERO: return kRoundTowardZero;
+        case _ARM_FE_TONEAREST:
+            return kRoundToNearestEven;
+        case _ARM_FE_UPWARD:
+            return kRoundUp;
+        case _ARM_FE_DOWNWARD:
+            return kRoundDown;
+        case _ARM_FE_TOWARDZERO:
+            return kRoundTowardZero;
     }
 
     return kDefaultRoundingMode;
 }
 
 #elif !(defined(_WIN32) && defined(_MSC_VER))
-RoundingMode set_round(RoundingMode r, Type outType)
+RoundingMode set_round( RoundingMode r, Type outType )
 {
-    static const int flt_rounds[kRoundingModeCount] = {
-        FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
-    };
-    static const int int_rounds[kRoundingModeCount] = {
-        FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
-    };
+    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
     const int *p = int_rounds;
-    if (outType == kfloat || outType == kdouble) p = flt_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
     int oldRound = fegetround();
-    fesetround(p[r]);
+    fesetround( p[r] );
 
-    switch (oldRound)
+    switch( oldRound )
     {
-        case FE_TONEAREST: return kRoundToNearestEven;
-        case FE_UPWARD: return kRoundUp;
-        case FE_DOWNWARD: return kRoundDown;
-        case FE_TOWARDZERO: return kRoundTowardZero;
-        default: abort(); // ??!
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+        default:
+            abort();    // ??!
     }
-    return kDefaultRoundingMode; // never happens
+    return kDefaultRoundingMode;    //never happens
 }
 
-RoundingMode get_round(void)
+RoundingMode get_round( void )
 {
     int oldRound = fegetround();
 
-    switch (oldRound)
+    switch( oldRound )
     {
-        case FE_TONEAREST: return kRoundToNearestEven;
-        case FE_UPWARD: return kRoundUp;
-        case FE_DOWNWARD: return kRoundDown;
-        case FE_TOWARDZERO: return kRoundTowardZero;
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
     }
 
     return kDefaultRoundingMode;
 }
 
 #else
-RoundingMode set_round(RoundingMode r, Type outType)
+RoundingMode set_round( RoundingMode r, Type outType )
 {
-    static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,
-                                                        _RC_UP, _RC_DOWN,
-                                                        _RC_CHOP };
-    static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,
-                                                        _RC_UP, _RC_DOWN,
-                                                        _RC_CHOP };
-    const int *p =
-        (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds;
+    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
     unsigned int oldRound;
 
-    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
-    if (err)
-    {
-        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",
-                   __FILE__, __LINE__);
-        return kDefaultRoundingMode; // what else never happens
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    if (err) {
+        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
+        return kDefaultRoundingMode;    //what else never happens
     }
 
     oldRound &= _MCW_RC;
 
-    RoundingMode old = (oldRound == _RC_NEAR)
-        ? kRoundToNearestEven
-        : (oldRound == _RC_UP) ? kRoundUp
-                               : (oldRound == _RC_DOWN)
-                ? kRoundDown
-                : (oldRound == _RC_CHOP) ? kRoundTowardZero
-                                         : kDefaultRoundingMode;
+    RoundingMode old =
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
 
-    _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode
-    return old; // returning old rounding mode
+    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
+    return old;    //returning old rounding mode
 }
 
-RoundingMode get_round(void)
+RoundingMode get_round( void )
 {
     unsigned int oldRound;
 
-    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
     oldRound &= _MCW_RC;
-    return (oldRound == _RC_NEAR)
-        ? kRoundToNearestEven
-        : (oldRound == _RC_UP) ? kRoundUp
-                               : (oldRound == _RC_DOWN)
-                ? kRoundDown
-                : (oldRound == _RC_CHOP) ? kRoundTowardZero
-                                         : kDefaultRoundingMode;
+    return
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
 }
 
 #endif
 
 //
-// FlushToZero() sets the host processor into ftz mode.  It is intended to have
-// a remote effect on the behavior of the code in basic_test_conversions.c. Some
-// host processors may not support this mode, which case you'll need to do some
-// clamping in software by testing against FLT_MIN or DBL_MIN in that file.
+// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
+// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
+// software by testing against FLT_MIN or DBL_MIN in that file.
 //
-// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT*
-// have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to
-// zero for subnormal inputs or outputs when FTZ mode is on like other basic
+// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
+// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
 // operators do (e.g. add, subtract, multiply, divide, etc.)
 //
 // Configuring hardware to FTZ mode varies by platform.
-// CAUTION: Some C implementations may also fail to behave properly in this
-// mode.
+// CAUTION: Some C implementations may also fail to behave properly in this mode.
 //
 //  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
-//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --
-//  provided that SSE/SSE2
-//          is used for floating point computation! If your OS uses x87, you'll
-//          need to figure out how to turn that off for the conversions code in
-//          basic_test_conversions.c so that they flush to zero properly.
-//          Otherwise, you'll need to add appropriate software clamping to
-//          basic_test_conversions.c in which case, these function are at
-//          liberty to do nothing.
+//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
+//          is used for floating point computation! If your OS uses x87, you'll need to figure out how
+//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
+//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
+//          in which case, these function are at liberty to do nothing.
 //
-#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
-#include <xmmintrin.h>
-#elif defined(__PPC__)
-#include <fpu_control.h>
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
 #endif
-void *FlushToZero(void)
+void *FlushToZero( void )
 {
-#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
-    union {
-        int i;
-        void *p;
-    } u = { _mm_getcsr() };
-    _mm_setcsr(u.i | 0x8040);
-    return u.p;
-#elif defined(__arm__) || defined(__aarch64__)
-    int fpscr;
-    _FPU_GETCW(fpscr);
-    _FPU_SETCW(fpscr | FPSCR_FZ);
-    return NULL;
-#elif defined(__PPC__)
-    fpu_control_t flags = 0;
-    _FPU_GETCW(flags);
-    flags |= _FPU_MASK_NI;
-    _FPU_SETCW(flags);
-    return NULL;
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ int i;  void *p; }u = { _mm_getcsr() };
+        _mm_setcsr( u.i | 0x8040 );
+        return u.p;
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr | FPSCR_FZ);
+        return NULL;
+    #elif defined( __PPC__ )
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags |= _FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        return NULL;
+        #else
+        #error Unknown arch
+    #endif
 #else
-#error Unknown arch
-#endif
-#else
-#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
 #endif
 }
 
-// Undo the effects of FlushToZero above, restoring the host to default
-// behavior, using the information passed in p.
-void UnFlushToZero(void *p)
+// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
+void UnFlushToZero( void *p)
 {
-#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
-    union {
-        void *p;
-        int i;
-    } u = { p };
-    _mm_setcsr(u.i);
-#elif defined(__arm__) || defined(__aarch64__)
-    int fpscr;
-    _FPU_GETCW(fpscr);
-    _FPU_SETCW(fpscr & ~FPSCR_FZ);
-#elif defined(__PPC__)
-    fpu_control_t flags = 0;
-    _FPU_GETCW(flags);
-    flags &= ~_FPU_MASK_NI;
-    _FPU_SETCW(flags);
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ void *p; int i;  }u = { p };
+        _mm_setcsr( u.i );
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr & ~FPSCR_FZ);
+    #elif defined( __PPC__)
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags &= ~_FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        #else
+        #error Unknown arch
+    #endif
 #else
-#error Unknown arch
-#endif
-#else
-#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
 #endif
 }

diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h
index 064a3a6..8c0e8a1 100644
--- a/test_common/harness/rounding_mode.h
+++ b/test_common/harness/rounding_mode.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,11 +16,9 @@
 #ifndef __ROUNDING_MODE_H__
 #define __ROUNDING_MODE_H__
 
-#pragma STDC FENV_ACCESS ON
-
 #include "compat.h"
 
-#if (defined(_WIN32) && defined(_MSC_VER))
+#if (defined(_WIN32) && defined (_MSC_VER))
 #include "errorHelpers.h"
 #include "testHarness.h"
 #endif
@@ -34,7 +32,7 @@
     kRoundTowardZero,
 
     kRoundingModeCount
-} RoundingMode;
+}RoundingMode;
 
 typedef enum
 {
@@ -49,14 +47,15 @@
     kulong = 8,
     klong = 9,
 
-    // This goes last
+    //This goes last
     kTypeCount
-} Type;
+}Type;
 
-extern RoundingMode set_round(RoundingMode r, Type outType);
-extern RoundingMode get_round(void);
-extern void *FlushToZero(void);
-extern void UnFlushToZero(void *p);
+extern RoundingMode set_round( RoundingMode r, Type outType );
+extern RoundingMode get_round( void );
+extern void *FlushToZero( void );
+extern void UnFlushToZero( void *p);
+
 
 
 #endif /* __ROUNDING_MODE_H__ */

diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 1aec3d0..04f12c7 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017-2019 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,7 +15,6 @@
 //
 #include "testHarness.h"
 #include "compat.h"
-#include <algorithm>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -35,13 +34,13 @@
 #include <unistd.h>
 #endif
 
-#if defined(__APPLE__)
+#if !defined(_WIN32) && !defined(__ANDROID__)
 #include <sys/sysctl.h>
 #endif
 
 #include <time.h>
 
-#if !defined(__APPLE__)
+#if !defined (__APPLE__)
 #include <CL/cl.h>
 #endif
 
@@ -52,114 +51,100 @@
 cl_uint gRandomSeed = 0;
 cl_uint gReSeed = 0;
 
-int gFlushDenormsToZero = 0;
-int gInfNanSupport = 1;
-int gIsEmbedded = 0;
-int gHasLong = 1;
-bool gCoreILProgram = true;
+int     gFlushDenormsToZero = 0;
+int     gInfNanSupport = 1;
+int     gIsEmbedded = 0;
+int     gIsOpenCL_C_1_0_Device = 0;
+int     gIsOpenCL_1_0_Device = 0;
+int     gHasLong = 1;
 
-#define DEFAULT_NUM_ELEMENTS 0x4000
+#define DEFAULT_NUM_ELEMENTS        0x4000
 
-int runTestHarness(int argc, const char *argv[], int testNum,
-                   test_definition testList[], int forceNoContextCreation,
-                   cl_command_queue_properties queueProps)
+int runTestHarness( int argc, const char *argv[], int testNum, test_definition testList[],
+                    int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
 {
-    return runTestHarnessWithCheck(argc, argv, testNum, testList,
-                                   forceNoContextCreation, queueProps, NULL);
+    return runTestHarnessWithCheck( argc, argv, testNum, testList, forceNoContextCreation, queueProps,
+                          ( imageSupportRequired ) ? verifyImageSupport : NULL );
 }
 
-int skip_init_info(int count)
-{
+int skip_init_info(int count) {
     log_info("Test skipped while initialization\n");
     log_info("SKIPPED %d of %d tests.\n", count, count);
     return EXIT_SUCCESS;
 }
 
-int fail_init_info(int count)
-{
+int fail_init_info(int count) {
     log_info("Test failed while initialization\n");
     log_info("FAILED %d of %d tests.\n", count, count);
     return EXIT_FAILURE;
 }
-void version_expected_info(const char *test_name, const char *api_name,
-                           const char *expected_version,
-                           const char *device_version)
-{
-    log_info("%s skipped (requires at least %s version %s, but the device "
-             "reports %s version %s)\n",
-             test_name, api_name, expected_version, api_name, device_version);
+void version_expected_info(const char * test_name, const char * expected_version, const char * device_version) {
+    log_info("%s skipped (requires at least version %s, but the device reports version %s)\n",
+        test_name, expected_version, device_version);
 }
-int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
-                            test_definition testList[],
-                            int forceNoContextCreation,
-                            cl_command_queue_properties queueProps,
-                            DeviceCheckFn deviceCheckFn)
+int runTestHarnessWithCheck( int argc, const char *argv[], int testNum, test_definition testList[],
+                             int forceNoContextCreation, cl_command_queue_properties queueProps,
+                             DeviceCheckFn deviceCheckFn )
 {
     test_start();
 
-    cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
-    cl_uint num_platforms = 0;
-    cl_platform_id *platforms;
-    cl_device_id device;
-    int num_elements = DEFAULT_NUM_ELEMENTS;
-    cl_uint num_devices = 0;
-    cl_device_id *devices = NULL;
-    cl_uint choosen_device_index = 0;
-    cl_uint choosen_platform_index = 0;
+    cl_device_type     device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_uint            num_platforms = 0;
+    cl_platform_id     *platforms;
+    cl_device_id       device;
+    int                num_elements = DEFAULT_NUM_ELEMENTS;
+    cl_uint            num_devices = 0;
+    cl_device_id       *devices = NULL;
+    cl_uint            choosen_device_index = 0;
+    cl_uint            choosen_platform_index = 0;
 
-    int err, ret;
+    int            err, ret;
     char *endPtr;
     int based_on_env_var = 0;
 
 
     /* Check for environment variable to set device type */
-    char *env_mode = getenv("CL_DEVICE_TYPE");
-    if (env_mode != NULL)
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
     {
         based_on_env_var = 1;
-        if (strcmp(env_mode, "gpu") == 0
-            || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
             device_type = CL_DEVICE_TYPE_GPU;
-        else if (strcmp(env_mode, "cpu") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
             device_type = CL_DEVICE_TYPE_CPU;
-        else if (strcmp(env_mode, "accelerator") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
             device_type = CL_DEVICE_TYPE_ACCELERATOR;
-        else if (strcmp(env_mode, "default") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0)
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
             device_type = CL_DEVICE_TYPE_DEFAULT;
         else
         {
-            log_error("Unknown CL_DEVICE_TYPE env variable setting: "
-                      "%s.\nAborting...\n",
-                      env_mode);
+            log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
             abort();
         }
     }
 
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     {
         // report on any unusual library search path indirection
-        char *libSearchPath = getenv("DYLD_LIBRARY_PATH");
-        if (libSearchPath)
-            log_info("*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath);
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
 
         // report on any unusual framework search path indirection
-        char *frameworkSearchPath = getenv("DYLD_FRAMEWORK_PATH");
-        if (libSearchPath)
-            log_info("*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath);
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
     }
 #endif
 
-    env_mode = getenv("CL_DEVICE_INDEX");
-    if (env_mode != NULL)
+    env_mode = getenv( "CL_DEVICE_INDEX" );
+    if( env_mode != NULL )
     {
         choosen_device_index = atoi(env_mode);
     }
 
-    env_mode = getenv("CL_PLATFORM_INDEX");
-    if (env_mode != NULL)
+    env_mode = getenv( "CL_PLATFORM_INDEX" );
+    if( env_mode != NULL )
     {
         choosen_platform_index = atoi(env_mode);
     }
@@ -173,43 +158,34 @@
     }
 
     /* Special case: just list the tests */
-    if ((argc > 1)
-        && (!strcmp(argv[1], "-list") || !strcmp(argv[1], "-h")
-            || !strcmp(argv[1], "--help")))
+    if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
     {
         char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
 
-        log_info(
-            "Usage: %s [<test name>*] [pid<num>] [id<num>] [<device type>]\n",
-            argv[0]);
-        log_info("\t<test name>\tOne or more of: (wildcard character '*') "
-                 "(default *)\n");
-        log_info("\tpid<num>\tIndicates platform at index <num> should be used "
-                 "(default 0).\n");
-        log_info("\tid<num>\t\tIndicates device at index <num> should be used "
-                 "(default 0).\n");
-        log_info("\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> "
-                 "(default CL_DEVICE_TYPE_DEFAULT)\n");
-        log_info("\n");
-        log_info("\tNOTE: You may pass environment variable "
-                 "CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
-                 fileName != NULL ? fileName : "<undefined>");
-        log_info("\t      to save results to JSON file.\n");
+        log_info( "Usage: %s [<test name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
+        log_info( "\t<test name>\tOne or more of: (wildcard character '*') (default *)\n");
+        log_info( "\tpid<num>\tIndicates platform at index <num> should be used (default 0).\n" );
+        log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
+        log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
+        log_info( "\n" );
+        log_info( "\tNOTE: You may pass environment variable CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
+                  fileName != NULL ? fileName : "<undefined>" );
+        log_info( "\t      to save results to JSON file.\n" );
 
-        log_info("\n");
-        log_info("Test names:\n");
-        for (int i = 0; i < testNum; i++)
+        log_info( "\n" );
+        log_info( "Test names:\n" );
+        for( int i = 0; i < testNum; i++ )
         {
-            log_info("\t%s\n", testList[i].name);
+            log_info( "\t%s\n", testList[i].name );
         }
         return EXIT_SUCCESS;
     }
 
     /* How are we supposed to seed the random # generators? */
-    if (argc > 1 && strcmp(argv[argc - 1], "randomize") == 0)
+    if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
     {
-        gRandomSeed = (cl_uint)time(NULL);
-        log_info("Random seed: %u.\n", gRandomSeed);
+        gRandomSeed = (cl_uint) time( NULL );
+        log_info( "Random seed: %u.\n", gRandomSeed );
         gReSeed = 1;
         argc--;
     }
@@ -218,44 +194,39 @@
         log_info(" Initializing random seed to 0.\n");
     }
 
-    /* Do we have an integer to specify the number of elements to pass to tests?
-     */
-    if (argc > 1)
+    /* Do we have an integer to specify the number of elements to pass to tests? */
+    if( argc > 1 )
     {
-        ret = (int)strtol(argv[argc - 1], &endPtr, 10);
-        if (endPtr != argv[argc - 1] && *endPtr == 0)
+        ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
+        if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
         {
-            /* By spec, this means the entire string was a valid integer, so we
-             * treat it as a num_elements spec */
+            /* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
             /* (hence why we stored the result in ret first) */
             num_elements = ret;
-            log_info("Testing with num_elements of %d\n", num_elements);
+            log_info( "Testing with num_elements of %d\n", num_elements );
             argc--;
         }
     }
 
     /* Do we have a CPU/GPU specification? */
-    if (argc > 1)
+    if( argc > 1 )
     {
-        if (strcmp(argv[argc - 1], "gpu") == 0
-            || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0)
+        if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
         {
             device_type = CL_DEVICE_TYPE_GPU;
             argc--;
         }
-        else if (strcmp(argv[argc - 1], "cpu") == 0
-                 || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_CPU") == 0)
+        else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
         {
             device_type = CL_DEVICE_TYPE_CPU;
             argc--;
         }
-        else if (strcmp(argv[argc - 1], "accelerator") == 0
-                 || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+        else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
         {
             device_type = CL_DEVICE_TYPE_ACCELERATOR;
             argc--;
         }
-        else if (strcmp(argv[argc - 1], "CL_DEVICE_TYPE_DEFAULT") == 0)
+        else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
         {
             device_type = CL_DEVICE_TYPE_DEFAULT;
             argc--;
@@ -263,74 +234,66 @@
     }
 
     /* Did we choose a specific device index? */
-    if (argc > 1)
+    if( argc > 1 )
     {
-        if (strlen(argv[argc - 1]) >= 3 && argv[argc - 1][0] == 'i'
-            && argv[argc - 1][1] == 'd')
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
         {
-            choosen_device_index = atoi(&(argv[argc - 1][2]));
+            choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
             argc--;
         }
     }
 
     /* Did we choose a specific platform index? */
-    if (argc > 1)
+    if( argc > 1 )
     {
-        if (strlen(argv[argc - 1]) >= 3 && argv[argc - 1][0] == 'p'
-            && argv[argc - 1][1] == 'i' && argv[argc - 1][2] == 'd')
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
         {
-            choosen_platform_index = atoi(&(argv[argc - 1][3]));
+            choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
             argc--;
         }
     }
 
 
-    switch (device_type)
-    {
-        case CL_DEVICE_TYPE_GPU: log_info("Requesting GPU device "); break;
-        case CL_DEVICE_TYPE_CPU: log_info("Requesting CPU device "); break;
-        case CL_DEVICE_TYPE_ACCELERATOR:
-            log_info("Requesting Accelerator device ");
-            break;
-        case CL_DEVICE_TYPE_DEFAULT:
-            log_info("Requesting Default device ");
-            break;
-        default: log_error("Requesting unknown device "); return EXIT_FAILURE;
-    }
-    log_info(based_on_env_var ? "based on environment variable "
-                              : "based on command line ");
-    log_info("for platform index %d and device index %d\n",
-             choosen_platform_index, choosen_device_index);
 
-#if defined(__APPLE__)
-#if defined(__i386__) || defined(__x86_64__)
-#define kHasSSE3 0x00000008
-#define kHasSupplementalSSE3 0x00000100
-#define kHasSSE4_1 0x00000400
-#define kHasSSE4_2 0x00000800
+	switch (device_type)
+	{
+	case CL_DEVICE_TYPE_GPU:            log_info("Requesting GPU device "); break;
+	case CL_DEVICE_TYPE_CPU:            log_info("Requesting CPU device "); break;
+	case CL_DEVICE_TYPE_ACCELERATOR:    log_info("Requesting Accelerator device "); break;
+	case CL_DEVICE_TYPE_DEFAULT:        log_info("Requesting Default device "); break;
+	default:                            log_error("Requesting unknown device "); return EXIT_FAILURE;
+	}
+	log_info(based_on_env_var ? "based on environment variable " : "based on command line ");
+	log_info("for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
+
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
     /* check our environment for a hint to disable SSE variants */
     {
-        const char *env = getenv("CL_MAX_SSE");
-        if (env)
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
         {
             extern int _cpu_capabilities;
             int mask = 0;
-            if (0 == strcasecmp(env, "SSE4.1"))
+            if( 0 == strcasecmp( env, "SSE4.1" ) )
                 mask = kHasSSE4_2;
-            else if (0 == strcasecmp(env, "SSSE3"))
+            else if( 0 == strcasecmp( env, "SSSE3" ) )
                 mask = kHasSSE4_2 | kHasSSE4_1;
-            else if (0 == strcasecmp(env, "SSE3"))
+            else if( 0 == strcasecmp( env, "SSE3" ) )
                 mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
-            else if (0 == strcasecmp(env, "SSE2"))
-                mask =
-                    kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            else if( 0 == strcasecmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
             else
             {
-                log_error("Error: Unknown CL_MAX_SSE setting: %s\n", env);
+                log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
                 return EXIT_FAILURE;
             }
 
-            log_info("*** Environment: CL_MAX_SSE = %s ***\n", env);
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
             _cpu_capabilities &= ~mask;
         }
     }
@@ -339,224 +302,204 @@
 
     /* Get the platform */
     err = clGetPlatformIDs(0, NULL, &num_platforms);
-    if (err)
-    {
+    if (err) {
         print_error(err, "clGetPlatformIDs failed");
         return EXIT_FAILURE;
     }
 
-    platforms =
-        (cl_platform_id *)malloc(num_platforms * sizeof(cl_platform_id));
-    if (!platforms || choosen_platform_index >= num_platforms)
-    {
-        log_error("platform index out of range -- choosen_platform_index (%d) "
-                  ">= num_platforms (%d)\n",
-                  choosen_platform_index, num_platforms);
+    platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
+    if (!platforms || choosen_platform_index >= num_platforms) {
+        log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
         return EXIT_FAILURE;
     }
     BufferOwningPtr<cl_platform_id> platformsBuf(platforms);
 
     err = clGetPlatformIDs(num_platforms, platforms, NULL);
-    if (err)
-    {
+    if (err) {
         print_error(err, "clGetPlatformIDs failed");
         return EXIT_FAILURE;
     }
 
     /* Get the number of requested devices */
-    err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0,
-                         NULL, &num_devices);
-    if (err)
-    {
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, 0, NULL, &num_devices );
+    if (err) {
         print_error(err, "clGetDeviceIDs failed");
         return EXIT_FAILURE;
     }
 
-    devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
-    if (!devices || choosen_device_index >= num_devices)
-    {
-        log_error("device index out of range -- choosen_device_index (%d) >= "
-                  "num_devices (%d)\n",
-                  choosen_device_index, num_devices);
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || choosen_device_index >= num_devices) {
+        log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
         return EXIT_FAILURE;
     }
     BufferOwningPtr<cl_device_id> devicesBuf(devices);
 
 
     /* Get the requested device */
-    err = clGetDeviceIDs(platforms[choosen_platform_index], device_type,
-                         num_devices, devices, NULL);
-    if (err)
-    {
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, num_devices, devices, NULL );
+    if (err) {
         print_error(err, "clGetDeviceIDs failed");
         return EXIT_FAILURE;
     }
 
     device = devices[choosen_device_index];
 
-    err = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(gDeviceType),
-                          &gDeviceType, NULL);
-    if (err)
+    err = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof(gDeviceType), &gDeviceType, NULL );
+    if( err )
     {
-        print_error(err, "Unable to get device type");
+        print_error( err, "Unable to get device type" );
         return TEST_FAIL;
     }
-
-    if (printDeviceHeader(device) != CL_SUCCESS)
+    
+    if( printDeviceHeader( device ) != CL_SUCCESS )
     {
         return EXIT_FAILURE;
     }
 
     cl_device_fp_config fpconfig = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(fpconfig),
-                          &fpconfig, NULL);
-    if (err)
-    {
-        print_error(err,
-                    "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
         return EXIT_FAILURE;
     }
 
-    gFlushDenormsToZero = (0 == (fpconfig & CL_FP_DENORM));
-    log_info("Supports single precision denormals: %s\n",
-             gFlushDenormsToZero ? "NO" : "YES");
-    log_info("sizeof( void*) = %d  (host)\n", (int)sizeof(void *));
+    gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
+    log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
+    log_info( "sizeof( void*) = %d  (host)\n", (int) sizeof( void* ) );
 
-    // detect whether profile of the device is embedded
+    //detect whether profile of the device is embedded
     char profile[1024] = "";
-    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
-                          NULL);
+    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
     if (err)
     {
-        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n");
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
         return EXIT_FAILURE;
     }
     gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
 
-    // detect the floating point capabilities
+    //detect the floating point capabilities
     cl_device_fp_config floatCapabilities = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
-                          sizeof(floatCapabilities), &floatCapabilities, NULL);
+    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
     if (err)
     {
-        print_error(err,
-                    "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
         return EXIT_FAILURE;
     }
 
     // Check for problems that only embedded will have
-    if (gIsEmbedded)
+    if( gIsEmbedded )
     {
-        // If the device is embedded, we need to detect if the device supports
-        // Infinity and NaN
-        if ((floatCapabilities & CL_FP_INF_NAN) == 0) gInfNanSupport = 0;
+        //If the device is embedded, we need to detect if the device supports Infinity and NaN
+        if ((floatCapabilities & CL_FP_INF_NAN) == 0)
+            gInfNanSupport = 0;
 
         // check the extensions list to see if ulong and long are supported
-        if (!is_extension_available(device, "cles_khr_int64")) gHasLong = 0;
+        if( !is_extension_available(device, "cles_khr_int64" ))
+            gHasLong = 0;
+    }
+
+    if( getenv( "OPENCL_1_0_DEVICE" ) )
+    {
+        char c_version[1024];
+        gIsOpenCL_1_0_Device = 1;
+        memset( c_version, 0, sizeof( c_version ) );
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
+        {
+            log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
+            return EXIT_FAILURE;
+        }
+
+        if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
+        {
+            gIsOpenCL_C_1_0_Device = 1;
+            log_info( "Device is a OpenCL C 1.0 device\n" );
+        }
+        else
+            log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
     }
 
     cl_uint device_address_bits = 0;
-    if ((err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
-                               sizeof(device_address_bits),
-                               &device_address_bits, NULL)))
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
     {
-        print_error(err, "Unable to obtain device address bits");
+        print_error( err, "Unable to obtain device address bits" );
         return EXIT_FAILURE;
     }
-    if (device_address_bits)
-        log_info("sizeof( void*) = %d  (device)\n", device_address_bits / 8);
+    if( device_address_bits )
+        log_info( "sizeof( void*) = %d  (device)\n", device_address_bits/8 );
     else
     {
         log_error("Invalid device address bit size returned by device.\n");
         return EXIT_FAILURE;
     }
-    if (gCompilationMode == kSpir_v)
-    {
-        test_status spirv_readiness = check_spirv_compilation_readiness(device);
-        if (spirv_readiness != TEST_PASS)
-        {
-            switch (spirv_readiness)
-            {
-                case TEST_PASS: break;
-                case TEST_FAIL: return fail_init_info(testNum);
-                case TEST_SKIP: return skip_init_info(testNum);
-                case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
-            }
-        }
-    }
+
 
     /* If we have a device checking function, run it */
-    if ((deviceCheckFn != NULL))
+    if( ( deviceCheckFn != NULL ) )
     {
-        test_status status = deviceCheckFn(device);
+        test_status status = deviceCheckFn( device );
         switch (status)
         {
-            case TEST_PASS: break;
-            case TEST_FAIL: return fail_init_info(testNum);
-            case TEST_SKIP: return skip_init_info(testNum);
-            case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+            case TEST_PASS:
+                break;
+            case TEST_FAIL:
+                return fail_init_info(testNum);
+            case TEST_SKIP:
+                return skip_init_info(testNum);
         }
     }
 
-    if (num_elements <= 0) num_elements = DEFAULT_NUM_ELEMENTS;
+    if (num_elements <= 0)
+        num_elements = DEFAULT_NUM_ELEMENTS;
 
         // On most platforms which support denorm, default is FTZ off. However,
-        // on some hardware where the reference is computed, default might be
-        // flush denorms to zero e.g. arm. This creates issues in result
-        // verification. Since spec allows the implementation to either flush or
-        // not flush denorms to zero, an implementation may choose not be flush
-        // i.e. return denorm result whereas reference result may be zero
-        // (flushed denorm). Hence we need to disable denorm flushing on host
-        // side where reference is being computed to make sure we get
-        // non-flushed reference result. If implementation returns flushed
-        // result, we correctly take care of that in verification code.
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
 #if defined(__APPLE__) && defined(__arm__)
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
 #endif
 
-    int error = parseAndCallCommandLineTests(argc, argv, device, testNum,
-                                             testList, forceNoContextCreation,
-                                             queueProps, num_elements);
+    int error = parseAndCallCommandLineTests( argc, argv, device, testNum, testList, forceNoContextCreation, queueProps, num_elements );
 
-#if defined(__APPLE__) && defined(__arm__)
-    // Restore the old FP mode before leaving.
-    RestoreFPState(&oldMode);
+ #if defined(__APPLE__) && defined(__arm__)
+     // Restore the old FP mode before leaving.
+    RestoreFPState( &oldMode );
 #endif
 
     return (error == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
-static int find_matching_tests(test_definition testList[],
-                               unsigned char selectedTestList[], int testNum,
-                               const char *argument, bool isWildcard)
+static int find_matching_tests( test_definition testList[], unsigned char selectedTestList[], int testNum,
+                                const char *argument, bool isWildcard )
 {
     int found_tests = 0;
-    size_t wildcard_length = strlen(argument) - 1; /* -1 for the asterisk */
+    size_t wildcard_length = strlen( argument ) - 1; /* -1 for the asterisk */
 
-    for (int i = 0; i < testNum; i++)
+    for( int i = 0; i < testNum; i++ )
     {
-        if ((!isWildcard && strcmp(testList[i].name, argument) == 0)
-            || (isWildcard
-                && strncmp(testList[i].name, argument, wildcard_length) == 0))
+        if( ( !isWildcard && strcmp( testList[i].name, argument ) == 0 ) ||
+            ( isWildcard && strncmp( testList[i].name, argument, wildcard_length ) == 0 ) )
         {
-            if (selectedTestList[i])
+            if( selectedTestList[i] )
             {
-                log_error("ERROR: Test '%s' has already been selected.\n",
-                          testList[i].name);
+                log_error( "ERROR: Test '%s' has already been selected.\n", testList[i].name );
                 return EXIT_FAILURE;
             }
-            else if (testList[i].func == NULL)
+            else if( testList[i].func == NULL )
             {
-                log_error("ERROR: Test '%s' is missing implementation.\n",
-                          testList[i].name);
+                log_error( "ERROR: Test '%s' is missing implementation.\n", testList[i].name );
                 return EXIT_FAILURE;
             }
             else
             {
                 selectedTestList[i] = 1;
                 found_tests = 1;
-                if (!isWildcard)
+                if( !isWildcard )
                 {
                     break;
                 }
@@ -564,26 +507,22 @@
         }
     }
 
-    if (!found_tests)
+    if( !found_tests )
     {
-        log_error("ERROR: The argument '%s' did not match any test names.\n",
-                  argument);
+        log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
         return EXIT_FAILURE;
     }
 
     return EXIT_SUCCESS;
 }
 
-static int saveResultsToJson(const char *fileName, const char *suiteName,
-                             test_definition testList[],
-                             unsigned char selectedTestList[],
-                             test_status resultTestList[], int testNum)
+static int saveResultsToJson( const char *fileName, const char *suiteName, test_definition testList[],
+                              unsigned char selectedTestList[], test_status resultTestList[], int testNum )
 {
-    FILE *file = fopen(fileName, "w");
-    if (NULL == file)
+    FILE *file = fopen( fileName, "w" );
+    if( NULL == file )
     {
-        log_error("ERROR: Failed to open '%s' for writing results.\n",
-                  fileName);
+        log_error( "ERROR: Failed to open '%s' for writing results.\n", fileName );
         return EXIT_FAILURE;
     }
 
@@ -592,226 +531,185 @@
     const char *linebreak[] = { "", ",\n" };
     int add_linebreak = 0;
 
-    fprintf(file, "{\n");
-    fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
-    fprintf(file, "\t\"results\": {\n");
+    fprintf( file, "{\n" );
+    fprintf( file, "\t\"cmd\": \"%s\",\n", suiteName );
+    fprintf( file, "\t\"results\": {\n" );
 
-    for (int i = 0; i < testNum; ++i)
+    for( int i = 0; i < testNum; ++i )
     {
-        if (selectedTestList[i])
+        if( selectedTestList[i] )
         {
-            fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
-                    testList[i].name, result_map[(int)resultTestList[i]]);
+            fprintf( file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak], testList[i].name, result_map[(int)resultTestList[i]] );
             add_linebreak = 1;
         }
     }
-    fprintf(file, "\n");
+    fprintf( file, "\n");
 
-    fprintf(file, "\t}\n");
-    fprintf(file, "}\n");
+    fprintf( file, "\t}\n" );
+    fprintf( file, "}\n" );
 
-    int ret = fclose(file) ? 1 : 0;
+    int ret = fclose( file ) ? 1 : 0;
 
-    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+    log_info( "Saving results to %s: %s!\n", fileName, save_map[ret] );
 
     return ret;
 }
 
-static void print_results(int failed, int count, const char *name)
+static void print_results( int failed, int count, const char* name )
 {
-    if (count < failed)
+    if( count < failed )
     {
         count = failed;
     }
 
-    if (failed == 0)
+    if( failed == 0 )
     {
-        if (count > 1)
+        if( count > 1 )
         {
-            log_info("PASSED %d of %d %ss.\n", count, count, name);
+            log_info( "PASSED %d of %d %ss.\n", count, count, name );
         }
         else
         {
-            log_info("PASSED %s.\n", name);
+            log_info( "PASSED %s.\n", name );
         }
     }
-    else if (failed > 0)
+    else if( failed > 0 )
     {
-        if (count > 1)
+        if( count > 1 )
         {
-            log_error("FAILED %d of %d %ss.\n", failed, count, name);
+            log_error( "FAILED %d of %d %ss.\n", failed, count, name );
         }
         else
         {
-            log_error("FAILED %s.\n", name);
+            log_error( "FAILED %s.\n", name );
         }
     }
 }
 
-int parseAndCallCommandLineTests(int argc, const char *argv[],
-                                 cl_device_id device, int testNum,
-                                 test_definition testList[],
-                                 int forceNoContextCreation,
-                                 cl_command_queue_properties queueProps,
-                                 int num_elements)
+int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, int testNum,
+                                  test_definition testList[], int forceNoContextCreation,
+                                  cl_command_queue_properties queueProps, int num_elements )
 {
     int ret = EXIT_SUCCESS;
 
-    unsigned char *selectedTestList = (unsigned char *)calloc(testNum, 1);
+    unsigned char *selectedTestList = ( unsigned char* ) calloc( testNum, 1 );
     test_status *resultTestList = NULL;
 
-    if (argc == 1)
+    if( argc == 1 )
     {
         /* No actual arguments, all tests will be run. */
-        memset(selectedTestList, 1, testNum);
+        memset( selectedTestList, 1, testNum );
     }
     else
     {
-        for (int i = 1; i < argc; i++)
+        for( int i = 1; i < argc; i++ )
         {
-            if (strchr(argv[i], '*') != NULL)
+            if( strchr( argv[i], '*' ) != NULL )
             {
-                ret = find_matching_tests(testList, selectedTestList, testNum,
-                                          argv[i], true);
+                ret = find_matching_tests( testList, selectedTestList, testNum, argv[i], true );
             }
             else
             {
-                if (strcmp(argv[i], "all") == 0)
+                if( strcmp( argv[i], "all" ) == 0 )
                 {
-                    memset(selectedTestList, 1, testNum);
+                    memset( selectedTestList, 1, testNum );
                     break;
                 }
                 else
                 {
-                    ret = find_matching_tests(testList, selectedTestList,
-                                              testNum, argv[i], false);
+                    ret = find_matching_tests( testList, selectedTestList, testNum, argv[i], false );
                 }
             }
 
-            if (ret == EXIT_FAILURE)
+            if( ret == EXIT_FAILURE )
             {
                 break;
             }
         }
     }
 
-    if (ret == EXIT_SUCCESS)
+    if( ret == EXIT_SUCCESS )
     {
-        resultTestList =
-            (test_status *)calloc(testNum, sizeof(*resultTestList));
+        resultTestList = ( test_status* ) calloc( testNum, sizeof(*resultTestList) );
 
-        callTestFunctions(testList, selectedTestList, resultTestList, testNum,
-                          device, forceNoContextCreation, num_elements,
-                          queueProps);
+        callTestFunctions( testList, selectedTestList, resultTestList, testNum, device,
+                           forceNoContextCreation, num_elements, queueProps );
 
-        print_results(gFailCount, gTestCount, "sub-test");
-        print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
+        print_results( gFailCount, gTestCount, "sub-test" );
+        print_results( gTestsFailed, gTestsFailed + gTestsPassed, "test" );
 
-        char *filename = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
-        if (filename != NULL)
+        char *filename = getenv( "CL_CONFORMANCE_RESULTS_FILENAME" );
+        if( filename != NULL )
         {
-            ret = saveResultsToJson(filename, argv[0], testList,
-                                    selectedTestList, resultTestList, testNum);
-        }
-
-        if (std::any_of(resultTestList, resultTestList + testNum,
-                        [](test_status result) {
-                            switch (result)
-                            {
-                                case TEST_PASS:
-                                case TEST_SKIP: return false;
-                                case TEST_FAIL:
-                                default: return true;
-                            };
-                        }))
-        {
-            ret = EXIT_FAILURE;
+            ret = saveResultsToJson( filename, argv[0], testList, selectedTestList, resultTestList, testNum );
         }
     }
 
-    free(selectedTestList);
-    free(resultTestList);
+    free( selectedTestList );
+    free( resultTestList );
 
     return ret;
 }
 
-void callTestFunctions(test_definition testList[],
-                       unsigned char selectedTestList[],
-                       test_status resultTestList[], int testNum,
-                       cl_device_id deviceToUse, int forceNoContextCreation,
-                       int numElementsToUse,
-                       cl_command_queue_properties queueProps)
+void callTestFunctions( test_definition testList[], unsigned char selectedTestList[], test_status resultTestList[],
+                        int testNum, cl_device_id deviceToUse, int forceNoContextCreation, int numElementsToUse,
+                        cl_command_queue_properties queueProps )
 {
-    for (int i = 0; i < testNum; ++i)
+    for( int i = 0; i < testNum; ++i )
     {
-        if (selectedTestList[i])
+        if( selectedTestList[i] )
         {
-            resultTestList[i] = callSingleTestFunction(
-                testList[i], deviceToUse, forceNoContextCreation,
-                numElementsToUse, queueProps);
+            resultTestList[i] = callSingleTestFunction( testList[i], deviceToUse, forceNoContextCreation,
+                                                        numElementsToUse, queueProps );
         }
     }
 }
 
-void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info,
-                                 size_t cb, void *user_data)
+void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
 {
-    log_info("%s\n", errinfo);
+    log_info( "%s\n", errinfo );
 }
 
 // Actual function execution
-test_status callSingleTestFunction(test_definition test,
-                                   cl_device_id deviceToUse,
-                                   int forceNoContextCreation,
-                                   int numElementsToUse,
-                                   const cl_queue_properties queueProps)
+test_status callSingleTestFunction( test_definition test, cl_device_id deviceToUse, int forceNoContextCreation,
+                                    int numElementsToUse, const cl_queue_properties queueProps )
 {
     test_status status;
     cl_int error;
     cl_context context = NULL;
     cl_command_queue queue = NULL;
 
-    log_info("%s...\n", test.name);
-    fflush(stdout);
+    log_info( "%s...\n", test.name );
+    fflush( stdout );
 
     const Version device_version = get_device_cl_version(deviceToUse);
     if (test.min_version > device_version)
     {
-        version_expected_info(test.name, "OpenCL",
-                              test.min_version.to_string().c_str(),
-                              device_version.to_string().c_str());
+        version_expected_info(test.name, test.min_version.to_string().c_str(), device_version.to_string().c_str());
         return TEST_SKIP;
     }
 
     /* Create a context to work with, unless we're told not to */
-    if (!forceNoContextCreation)
+    if( !forceNoContextCreation )
     {
-        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL,
-                                  &error);
+        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
         if (!context)
         {
-            print_error(error, "Unable to create testing context");
+            print_error( error, "Unable to create testing context" );
             return TEST_FAIL;
         }
 
-        if (device_version < Version(2, 0))
-        {
-            queue =
-                clCreateCommandQueue(context, deviceToUse, queueProps, &error);
-        }
-        else
-        {
-            const cl_command_queue_properties cmd_queueProps =
-                (queueProps) ? CL_QUEUE_PROPERTIES : 0;
-            cl_command_queue_properties queueCreateProps[] = { cmd_queueProps,
-                                                               queueProps, 0 };
-            queue = clCreateCommandQueueWithProperties(
-                context, deviceToUse, &queueCreateProps[0], &error);
+        if (device_version < Version(2, 0)) {
+            queue = clCreateCommandQueue(context, deviceToUse, queueProps, &error);
+        } else {
+            const cl_command_queue_properties cmd_queueProps = (queueProps)?CL_QUEUE_PROPERTIES:0;
+            cl_command_queue_properties queueCreateProps[] = {cmd_queueProps, queueProps, 0};
+            queue = clCreateCommandQueueWithProperties( context, deviceToUse, &queueCreateProps[0], &error );
         }
 
-        if (queue == NULL)
+        if( queue == NULL )
         {
-            print_error(error, "Unable to create testing command queue");
+            print_error( error, "Unable to create testing command queue" );
             return TEST_FAIL;
         }
     }
@@ -820,35 +718,38 @@
     error = check_functions_for_offline_compiler(test.name, deviceToUse);
     test_missing_support_offline_cmpiler(error, test.name);
 
-    if (test.func == NULL)
+    if( test.func == NULL )
     {
-        // Skip unimplemented test, can happen when all of the tests are
-        // selected
+        // Skip unimplemented test, can happen when all of the tests are selected
         log_info("%s test currently not implemented\n", test.name);
         status = TEST_SKIP;
     }
     else
     {
-        int ret = test.func(deviceToUse, context, queue, numElementsToUse);
-        if (ret == TEST_SKIPPED_ITSELF)
+        int ret = test.func(deviceToUse, context, queue, numElementsToUse);        //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
+        if( ret == TEST_NOT_IMPLEMENTED )
         {
-            /* Tests can also let us know they're not supported by the
-             * implementation */
+            /* Tests can also let us know they're not implemented yet */
+            log_info("%s test currently not implemented\n", test.name);
+            status = TEST_SKIP;
+        }
+        else if (ret == TEST_SKIPPED_ITSELF)
+        {
+            /* Tests can also let us know they're not supported by the implementation */
             log_info("%s test not supported\n", test.name);
             status = TEST_SKIP;
         }
         else
         {
             /* Print result */
-            if (ret == 0)
-            {
-                log_info("%s passed\n", test.name);
+            if( ret == 0 ) {
+                log_info( "%s passed\n", test.name );
                 gTestsPassed++;
                 status = TEST_PASS;
             }
             else
             {
-                log_error("%s FAILED\n", test.name);
+                log_error( "%s FAILED\n", test.name );
                 gTestsFailed++;
                 status = TEST_FAIL;
             }
@@ -856,50 +757,50 @@
     }
 
     /* Release the context */
-    if (!forceNoContextCreation)
+    if( !forceNoContextCreation )
     {
         int error = clFinish(queue);
-        if (error)
-        {
-            log_error("clFinish failed: %s\n", IGetErrorString(error));
+        if (error) {
+            log_error("clFinish failed: %d", error);
             status = TEST_FAIL;
         }
-        clReleaseCommandQueue(queue);
-        clReleaseContext(context);
+        clReleaseCommandQueue( queue );
+        clReleaseContext( context );
     }
 
     return status;
 }
 
-#if !defined(__APPLE__)
-void memset_pattern4(void *dest, const void *src_pattern, size_t bytes)
+#if ! defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
 {
-    uint32_t pat = ((uint32_t *)src_pattern)[0];
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
     size_t count = bytes / 4;
     size_t i;
-    uint32_t *d = (uint32_t *)dest;
+    uint32_t *d = (uint32_t*)dest;
 
-    for (i = 0; i < count; i++) d[i] = pat;
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
 
     d += i;
 
     bytes &= 3;
-    if (bytes) memcpy(d, src_pattern, bytes);
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
 }
 #endif
 
-cl_device_type GetDeviceType(cl_device_id d)
+cl_device_type GetDeviceType( cl_device_id d )
 {
     cl_device_type result = -1;
-    cl_int err =
-        clGetDeviceInfo(d, CL_DEVICE_TYPE, sizeof(result), &result, NULL);
-    if (CL_SUCCESS != err)
-        log_error("ERROR: Unable to get device type for device %p\n", d);
+    cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
+    if( CL_SUCCESS != err )
+        log_error( "ERROR: Unable to get device type for device %p\n", d );
     return result;
 }
 
 
-cl_device_id GetOpposingDevice(cl_device_id device)
+cl_device_id GetOpposingDevice( cl_device_id device )
 {
     cl_int error;
     cl_device_id *otherDevices;
@@ -907,59 +808,53 @@
     cl_platform_id plat;
 
     // Get the platform of the device to use for getting a list of devices
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
+    if( error != CL_SUCCESS )
     {
-        print_error(error, "Unable to get device's platform");
+        print_error( error, "Unable to get device's platform" );
         return NULL;
     }
 
     // Get a list of all devices
-    error = clGetDeviceIDs(plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
+    if( error != CL_SUCCESS )
     {
-        print_error(error, "Unable to get list of devices size");
+        print_error( error, "Unable to get list of devices size" );
         return NULL;
     }
-    otherDevices = (cl_device_id *)malloc(actualCount * sizeof(cl_device_id));
-    if (NULL == otherDevices)
-    {
-        print_error(error, "Unable to allocate list of other devices.");
+    otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
+    if (NULL == otherDevices) {
+        print_error( error, "Unable to allocate list of other devices." );
         return NULL;
     }
     BufferOwningPtr<cl_device_id> otherDevicesBuf(otherDevices);
 
-    error = clGetDeviceIDs(plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices,
-                           NULL);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
+    if( error != CL_SUCCESS )
     {
-        print_error(error, "Unable to get list of devices");
+        print_error( error, "Unable to get list of devices" );
         return NULL;
     }
 
-    if (actualCount == 1)
+    if( actualCount == 1 )
     {
-        return device; // NULL means error, returning self means we couldn't
-                       // find another one
+        return device;    // NULL means error, returning self means we couldn't find another one
     }
 
     // Loop and just find one that isn't the one we were given
     cl_uint i;
-    for (i = 0; i < actualCount; i++)
+    for( i = 0; i < actualCount; i++ )
     {
-        if (otherDevices[i] != device)
+        if( otherDevices[ i ] != device )
         {
             cl_device_type newType;
-            error = clGetDeviceInfo(otherDevices[i], CL_DEVICE_TYPE,
-                                    sizeof(newType), &newType, NULL);
-            if (error != CL_SUCCESS)
+            error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
+            if( error != CL_SUCCESS )
             {
-                print_error(error,
-                            "Unable to get device type for other device");
+                print_error( error, "Unable to get device type for other device" );
                 return NULL;
             }
-            cl_device_id result = otherDevices[i];
+            cl_device_id result = otherDevices[ i ];
             return result;
         }
     }
@@ -975,8 +870,7 @@
     ASSERT_SUCCESS(err, "clGetDeviceInfo");
 
     std::vector<char> str(str_size);
-    err =
-        clGetDeviceInfo(device, CL_DEVICE_VERSION, str_size, str.data(), NULL);
+    err = clGetDeviceInfo(device, CL_DEVICE_VERSION, str_size, str.data(), NULL);
     ASSERT_SUCCESS(err, "clGetDeviceInfo");
 
     if (strstr(str.data(), "OpenCL 1.0") != NULL)
@@ -994,229 +888,55 @@
     else if (strstr(str.data(), "OpenCL 3.0") != NULL)
         return Version(3, 0);
 
-    throw std::runtime_error(std::string("Unknown OpenCL version: ")
-                             + str.data());
+    throw std::runtime_error(std::string("Unknown OpenCL version: ") + str.data());
 }
 
-bool check_device_spirv_version_reported(cl_device_id device)
+void PrintArch( void )
 {
-    size_t str_size;
-    cl_int err;
-    std::vector<char> str;
-    if (gCoreILProgram)
-    {
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, 0, NULL, &str_size);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION size;");
-            return false;
-        }
-
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, str_size,
-                              str.data(), NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION value;");
-            return false;
-        }
-    }
-    else
-    {
-        cl_int err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, 0, NULL,
-                                     &str_size);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION_KHR size;");
-            return false;
-        }
-
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, str_size,
-                              str.data(), NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION_KHR value;");
-            return false;
-        }
-    }
-
-    if (strstr(str.data(), "SPIR-V") == NULL)
-    {
-        log_info("This device does not support SPIR-V offline compilation.\n");
-        return false;
-    }
-    else
-    {
-        Version spirv_version = get_device_spirv_il_version(device);
-        log_info("This device supports SPIR-V offline compilation. SPIR-V "
-                 "version is %s\n",
-                 spirv_version.to_string().c_str());
-    }
-    return true;
-}
-
-Version get_device_spirv_il_version(cl_device_id device)
-{
-    size_t str_size;
-    cl_int err;
-    std::vector<char> str;
-    if (gCoreILProgram)
-    {
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, 0, NULL, &str_size);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, str_size,
-                              str.data(), NULL);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-    }
-    else
-    {
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, 0, NULL,
-                              &str_size);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, str_size,
-                              str.data(), NULL);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-    }
-
-    if (strstr(str.data(), "SPIR-V_1.0") != NULL)
-        return Version(1, 0);
-    else if (strstr(str.data(), "SPIR-V_1.1") != NULL)
-        return Version(1, 1);
-    else if (strstr(str.data(), "SPIR-V_1.2") != NULL)
-        return Version(1, 2);
-    else if (strstr(str.data(), "SPIR-V_1.3") != NULL)
-        return Version(1, 3);
-    else if (strstr(str.data(), "SPIR-V_1.4") != NULL)
-        return Version(1, 4);
-    else if (strstr(str.data(), "SPIR-V_1.5") != NULL)
-        return Version(1, 5);
-
-    throw std::runtime_error(std::string("Unknown SPIR-V version: ")
-                             + str.data());
-}
-
-test_status check_spirv_compilation_readiness(cl_device_id device)
-{
-    auto ocl_version = get_device_cl_version(device);
-    auto ocl_expected_min_version = Version(2, 1);
-
-    if (ocl_version < ocl_expected_min_version)
-    {
-        if (is_extension_available(device, "cl_khr_il_program"))
-        {
-            gCoreILProgram = false;
-            bool spirv_supported = check_device_spirv_version_reported(device);
-            if (spirv_supported == false)
-            {
-                log_error("SPIR-V intermediate language not supported !!! "
-                          "OpenCL %s requires support.\n",
-                          ocl_version.to_string().c_str());
-                return TEST_FAIL;
-            }
-            else
-            {
-                return TEST_PASS;
-            }
-        }
-        else
-        {
-            log_error("SPIR-V intermediate language support on OpenCL version "
-                      "%s requires cl_khr_il_program extension.\n",
-                      ocl_version.to_string().c_str());
-            return TEST_SKIP;
-        }
-    }
-
-    bool spirv_supported = check_device_spirv_version_reported(device);
-    if (ocl_version >= ocl_expected_min_version && ocl_version <= Version(2, 2))
-    {
-        if (spirv_supported == false)
-        {
-            log_error("SPIR-V intermediate language not supported !!! OpenCL "
-                      "%s requires support.\n",
-                      ocl_version.to_string().c_str());
-            return TEST_FAIL;
-        }
-    }
-
-    if (ocl_version > Version(2, 2))
-    {
-        if (spirv_supported == false)
-        {
-            log_info("SPIR-V intermediate language not supported in OpenCL %s. "
-                     "Test skipped.\n",
-                     ocl_version.to_string().c_str());
-            return TEST_SKIP;
-        }
-    }
-    return TEST_PASS;
-}
-
-cl_platform_id getPlatformFromDevice(cl_device_id deviceID)
-{
-    cl_platform_id platform = nullptr;
-    cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform),
-                                 &platform, nullptr);
-    ASSERT_SUCCESS(err, "clGetDeviceInfo");
-    return platform;
-}
-
-void PrintArch(void)
-{
-    vlog("sizeof( void*) = %ld\n", sizeof(void *));
-#if defined(__ppc__)
-    vlog("ARCH:\tppc\n");
-#elif defined(__ppc64__)
-    vlog("ARCH:\tppc64\n");
-#elif defined(__PPC__)
-    vlog("ARCH:\tppc\n");
-#elif defined(__i386__)
-    vlog("ARCH:\ti386\n");
-#elif defined(__x86_64__)
-    vlog("ARCH:\tx86_64\n");
-#elif defined(__arm__)
-    vlog("ARCH:\tarm\n");
+    vlog( "sizeof( void*) = %ld\n", sizeof( void *) );
+#if defined( __ppc__ )
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __ppc64__ )   
+    vlog( "ARCH:\tppc64\n" );
+#elif defined( __PPC__ )   
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __i386__ )   
+    vlog( "ARCH:\ti386\n" );
+#elif defined( __x86_64__ )   
+    vlog( "ARCH:\tx86_64\n" );
+#elif defined( __arm__ )   
+    vlog( "ARCH:\tarm\n" );
 #elif defined(__aarch64__)
-    vlog("ARCH:\taarch64\n");
-#elif defined(_WIN32)
-    vlog("ARCH:\tWindows\n");
+    vlog( "ARCH:\taarch64\n" );
+#elif defined (_WIN32)
+    vlog( "ARCH:\tWindows\n" );
 #else
 #error unknown arch
 #endif
 
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
 
     int type = 0;
-    size_t typeSize = sizeof(type);
-    sysctlbyname("hw.cputype", &type, &typeSize, NULL, 0);
-    vlog("cpu type:\t%d\n", type);
-    typeSize = sizeof(type);
-    sysctlbyname("hw.cpusubtype", &type, &typeSize, NULL, 0);
-    vlog("cpu subtype:\t%d\n", type);
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu subtype:\t%d\n", type );
 
-#elif defined(__linux__)
+#elif defined( __linux__ )
     struct utsname buffer;
-
-    if (uname(&buffer) != 0)
-    {
-        vlog("uname error");
+ 
+    if (uname(&buffer) != 0) {
+       vlog("uname error");
     }
-    else
-    {
-        vlog("system name = %s\n", buffer.sysname);
-        vlog("node name   = %s\n", buffer.nodename);
-        vlog("release     = %s\n", buffer.release);
-        vlog("version     = %s\n", buffer.version);
-        vlog("machine     = %s\n", buffer.machine);
+    else {
+       vlog("system name = %s\n", buffer.sysname);
+       vlog("node name   = %s\n", buffer.nodename);
+       vlog("release     = %s\n", buffer.release);
+       vlog("version     = %s\n", buffer.version);
+       vlog("machine     = %s\n", buffer.machine);
     }
 #endif
 }
+

diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
index d6054de..7132e77 100644
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017-2019 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,26 +23,18 @@
 
 #include <string>
 
-class Version {
+class Version
+{
 public:
-    Version(): m_major(0), m_minor(0) {}
-    Version(int major, int minor): m_major(major), m_minor(minor) {}
-    bool operator>(const Version &rhs) const { return to_int() > rhs.to_int(); }
-    bool operator<(const Version &rhs) const { return to_int() < rhs.to_int(); }
-    bool operator<=(const Version &rhs) const
-    {
-        return to_int() <= rhs.to_int();
-    }
-    bool operator>=(const Version &rhs) const
-    {
-        return to_int() >= rhs.to_int();
-    }
-    bool operator==(const Version &rhs) const
-    {
-        return to_int() == rhs.to_int();
-    }
+    Version() : m_major(0), m_minor(0) {}
+    Version(int major, int minor) : m_major(major), m_minor(minor) {}
+    bool operator>(const Version& rhs) const { return to_int() > rhs.to_int(); }
+    bool operator<(const Version& rhs) const { return to_int() < rhs.to_int(); }
+    bool operator<=(const Version& rhs) const { return to_int() <= rhs.to_int(); }
+    bool operator>=(const Version& rhs) const { return to_int() >= rhs.to_int(); }
+    bool operator==(const Version& rhs) const { return to_int() == rhs.to_int(); }
     int to_int() const { return m_major * 10 + m_minor; }
-    std::string to_string() const
+    std::string to_string() const 
     {
         std::stringstream ss;
         ss << m_major << "." << m_minor;
@@ -64,13 +56,17 @@
     {                                                                          \
         test_##fn, #fn, ver                                                    \
     }
+#define NOT_IMPLEMENTED_TEST(fn)                                               \
+    {                                                                          \
+        NULL, #fn, Version(0, 0)                                               \
+    }
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
 typedef struct test_definition
 {
     basefn func;
-    const char *name;
+    const char* name;
     Version min_version;
 } test_definition;
 
@@ -80,7 +76,6 @@
     TEST_PASS = 0,
     TEST_FAIL = 1,
     TEST_SKIP = 2,
-    TEST_SKIPPED_ITSELF = -100,
 } test_status;
 
 extern int gFailCount;
@@ -88,103 +83,74 @@
 extern cl_uint gReSeed;
 extern cl_uint gRandomSeed;
 
-// Supply a list of functions to test here. This will allocate a CL device,
-// create a context, all that setup work, and then call each function in turn as
-// dictatated by the passed arguments. Returns EXIT_SUCCESS iff all tests
-// succeeded or the tests were listed, otherwise return EXIT_FAILURE.
-extern int runTestHarness(int argc, const char *argv[], int testNum,
-                          test_definition testList[],
-                          int forceNoContextCreation,
-                          cl_command_queue_properties queueProps);
+// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
+// setup work, and then call each function in turn as dictatated by the passed arguments.
+// Returns EXIT_SUCCESS iff all tests succeeded or the tests were listed,
+// otherwise return EXIT_FAILURE.
+extern int runTestHarness( int argc, const char *argv[], int testNum, test_definition testList[],
+                           int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
 
-// Device checking function. See runTestHarnessWithCheck. If this function
-// returns anything other than TEST_PASS, the harness exits.
-typedef test_status (*DeviceCheckFn)(cl_device_id device);
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits.
+typedef test_status (*DeviceCheckFn)( cl_device_id device );
 
-// Same as runTestHarness, but also supplies a function that checks the created
-// device for required functionality. Returns EXIT_SUCCESS iff all tests
-// succeeded or the tests were listed, otherwise return EXIT_FAILURE.
-extern int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
-                                   test_definition testList[],
-                                   int forceNoContextCreation,
-                                   cl_command_queue_properties queueProps,
-                                   DeviceCheckFn deviceCheckFn);
+// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
+// Returns EXIT_SUCCESS iff all tests succeeded or the tests were listed,
+// otherwise return EXIT_FAILURE.
+extern int runTestHarnessWithCheck( int argc, const char *argv[], int testNum, test_definition testList[],
+                                    int forceNoContextCreation, cl_command_queue_properties queueProps,
+                                    DeviceCheckFn deviceCheckFn );
 
-// The command line parser used by runTestHarness to break up parameters into
-// calls to callTestFunctions
-extern int parseAndCallCommandLineTests(int argc, const char *argv[],
-                                        cl_device_id device, int testNum,
-                                        test_definition testList[],
-                                        int forceNoContextCreation,
-                                        cl_command_queue_properties queueProps,
-                                        int num_elements);
+// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
+extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, int testNum,
+                                         test_definition testList[], int forceNoContextCreation,
+                                         cl_command_queue_properties queueProps, int num_elements );
 
-// Call this function if you need to do all the setup work yourself, and just
-// need the function list called/ managed.
+// Call this function if you need to do all the setup work yourself, and just need the function list called/
+// managed.
 //    testList is the data structure that contains test functions and its names
-//    selectedTestList is an array of integers (treated as bools) which tell
-//    which function is to be called,
-//       each element at index i, corresponds to the element in testList at
-//       index i
-//    resultTestList is an array of statuses which contain the result of each
-//    selected test testNum is the number of tests in testList, selectedTestList
-//    and resultTestList contextProps are used to create a testing context for
-//    each test deviceToUse and numElementsToUse are all just passed to each
-//    test function
-extern void callTestFunctions(test_definition testList[],
-                              unsigned char selectedTestList[],
-                              test_status resultTestList[], int testNum,
-                              cl_device_id deviceToUse,
-                              int forceNoContextCreation, int numElementsToUse,
-                              cl_command_queue_properties queueProps);
+//    selectedTestList is an array of integers (treated as bools) which tell which function is to be called,
+//       each element at index i, corresponds to the element in testList at index i
+//    resultTestList is an array of statuses which contain the result of each selected test
+//    testNum is the number of tests in testList, selectedTestList and resultTestList
+//    contextProps are used to create a testing context for each test
+//    deviceToUse and numElementsToUse are all just passed to each test function
+extern void callTestFunctions( test_definition testList[], unsigned char selectedTestList[], test_status resultTestList[],
+                               int testNum, cl_device_id deviceToUse, int forceNoContextCreation, int numElementsToUse,
+                               cl_command_queue_properties queueProps );
 
-// This function is called by callTestFunctions, once per function, to do setup,
-// call, logging and cleanup
-extern test_status
-callSingleTestFunction(test_definition test, cl_device_id deviceToUse,
-                       int forceNoContextCreation, int numElementsToUse,
-                       cl_command_queue_properties queueProps);
+// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
+extern test_status callSingleTestFunction( test_definition test, cl_device_id deviceToUse, int forceNoContextCreation,
+                                           int numElementsToUse, cl_command_queue_properties queueProps );
 
 ///// Miscellaneous steps
 
 // standard callback function for context pfn_notify
-extern void CL_CALLBACK notify_callback(const char *errinfo,
-                                        const void *private_info, size_t cb,
-                                        void *user_data);
+extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
 
-extern cl_device_type GetDeviceType(cl_device_id);
+extern cl_device_type GetDeviceType( cl_device_id );
 
-// Given a device (most likely passed in by the harness, but not required), will
-// attempt to find a DIFFERENT device and return it. Useful for finding another
-// device to run multi-device tests against. Note that returning NULL means an
-// error was hit, but if no error was hit and the device passed in is the only
-// device available, the SAME device is returned, so check!
-extern cl_device_id GetOpposingDevice(cl_device_id device);
+// Given a device (most likely passed in by the harness, but not required), will attempt to find
+// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
+// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
+// is the only device available, the SAME device is returned, so check!
+extern cl_device_id GetOpposingDevice( cl_device_id device );
 
-Version get_device_spirv_il_version(cl_device_id device);
-bool check_device_spirv_il_support(cl_device_id device);
-void version_expected_info(const char *test_name, const char *api_name,
-                           const char *expected_version,
-                           const char *device_version);
-test_status check_spirv_compilation_readiness(cl_device_id device);
+void version_expected_info(const char * test_name, const char * expected_version, const char * device_version);
 
 
-extern int gFlushDenormsToZero; // This is set to 1 if the device does not
-                                // support denorms (CL_FP_DENORM)
-extern int gInfNanSupport; // This is set to 1 if the device supports infinities
-                           // and NaNs
-extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
-extern int gHasLong; // This is set to 1 if the device suppots long and ulong
-                     // types in OpenCL C.
-extern bool gCoreILProgram;
+extern int      gFlushDenormsToZero;    // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
+extern int      gInfNanSupport;         // This is set to 1 if the device supports infinities and NaNs
+extern int        gIsEmbedded;            // This is set to 1 if the device is an embedded device
+extern int        gHasLong;               // This is set to 1 if the device suppots long and ulong types in OpenCL C.
+extern int      gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
 
-extern cl_platform_id getPlatformFromDevice(cl_device_id deviceID);
-
-#if !defined(__APPLE__)
-void memset_pattern4(void *, const void *, size_t);
+#if ! defined( __APPLE__ )
+    void     memset_pattern4(void *, const void *, size_t);
 #endif
 
 extern void PrintArch(void);
 
 
 #endif // _testHarness_h
+
+

diff --git a/test_common/harness/test_mt19937.c b/test_common/harness/test_mt19937.c
index fa57fd3..c0498ea 100644
--- a/test_common/harness/test_mt19937.c
+++ b/test_common/harness/test_mt19937.c

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,26 +16,24 @@
 #include "mt19937.h"
 #include <stdio.h>
 
-int main(void)
+int main( void )
 {
     MTdata d = init_genrand(42);
     int i;
-    const cl_uint reference[16] = {
-        0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4, 0x55c31f24, 0x8bcd21ab,
-        0x14d5fef5, 0x9416d2b6, 0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
-        0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92
-    };
+    const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
+                                    0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
+                                    0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
+                                    0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
     int errcount = 0;
 
-    for (i = 0; i < 65536; i++)
+    for( i = 0; i < 65536; i++ )
     {
-        cl_uint u = genrand_int32(d);
-        if (0 == (i & 4095))
+        cl_uint u = genrand_int32( d );
+        if( 0 == (i & 4095) )
         {
-            if (u != reference[i >> 12])
+            if( u != reference[i>>12] )
             {
-                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n",
-                       reference[i >> 12], i, u);
+                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n", reference[i>>12], i, u );
                 errcount++;
             }
         }
@@ -43,7 +41,7 @@
 
     free_mtdata(d);
 
-    if (errcount)
+    if( errcount )
         printf("mt19937 test failed.\n");
     else
         printf("mt19937 test passed.\n");

diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp
index 875ee59..1a07f97 100644
--- a/test_common/harness/threadTesting.cpp
+++ b/test_common/harness/threadTesting.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -96,3 +96,5 @@
     return (int)((intptr_t)retVal);
 }
 #endif
+
+

diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h
index 765eabc..71d5797 100644
--- a/test_common/harness/threadTesting.h
+++ b/test_common/harness/threadTesting.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,15 +17,17 @@
 #define _threadTesting_h
 
 #ifdef __APPLE__
-#include <OpenCL/opencl.h>
+    #include <OpenCL/opencl.h>
 #else
-#include <CL/opencl.h>
+    #include <CL/opencl.h>
 #endif
 
-typedef int (*basefn)(cl_device_id deviceID, cl_context context,
-                      cl_command_queue queue, int num_elements);
-extern int test_threaded_function(basefn fnToTest, cl_device_id device,
-                                  cl_context context, cl_command_queue queue,
-                                  int numElements);
+#define TEST_NOT_IMPLEMENTED        -99
+#define TEST_SKIPPED_ITSELF         -100
+
+typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
 
 #endif // _threadTesting_h
+
+

diff --git a/test_common/harness/typeWrappers.cpp b/test_common/harness/typeWrappers.cpp
index e6520b1..d4e08fb 100644
--- a/test_common/harness/typeWrappers.cpp
+++ b/test_common/harness/typeWrappers.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,310 +19,256 @@
 #include <stdlib.h>
 #include "clImageHelper.h"
 
-#define ROUND_SIZE_UP(_size, _align)                                           \
-    (((size_t)(_size) + (size_t)(_align)-1) & -((size_t)(_align)))
+#define ROUND_SIZE_UP( _size, _align )      (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
 
-#if defined(__APPLE__)
-#define kPageSize 4096
-#include <sys/mman.h>
-#include <stdlib.h>
+#if defined( __APPLE__ )
+    #define kPageSize       4096
+    #include <sys/mman.h>
+    #include <stdlib.h>
 #elif defined(__linux__)
-#include <unistd.h>
-#define kPageSize (getpagesize())
+    #include <unistd.h>
+    #define kPageSize  (getpagesize())
 #endif
 
-clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
 {
-    cl_int err = Create(context, mem_flags, fmt, width);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, mem_flags, fmt, width );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
 }
 
-cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
 {
     cl_int error;
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     int protect_pages = 1;
     cl_device_id devices[16];
     size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
     test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
 
     number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
-    {
+    for (int i=0; i<(int)number_of_devices; i++) {
         cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
         test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
+        if (type == CL_DEVICE_TYPE_GPU) {
             protect_pages = 0;
             break;
         }
     }
 
-    if (protect_pages)
-    {
+    if (protect_pages) {
         size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
         size_t rowStride = rowBytes + kPageSize;
 
         // create backing store
         backingStoreSize = rowStride + 8 * rowStride;
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
 
         // add guard pages
         size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
         {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
         }
         p += rowBytes;
-        mprotect(p, kPageSize, PROT_NONE);
-        p += rowStride;
+        mprotect( p, kPageSize, PROT_NONE );        p += rowStride;
         p -= rowBytes;
-        for (row = 0; row < 4; row++)
+        for( row = 0; row < 4; row++ )
         {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
         }
 
-        if (getenv("CL_ALIGN_RIGHT"))
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
         {
             static int spewEnv = 1;
-            if (spewEnv)
+            if(spewEnv)
             {
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
                 spewEnv = 0;
             }
             imagePtr += rowBytes - pixelBytes * width;
         }
 
-        image = create_image_1d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
-                                width, rowStride, imagePtr, NULL, &error);
-    }
-    else
-    {
+        image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+    } else {
         backingStore = NULL;
-        image = create_image_1d(context, mem_flags, fmt, width, 0, NULL, NULL,
-                                &error);
+        image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
     }
 #else
 
     backingStore = NULL;
-    image =
-        create_image_1d(context, mem_flags, fmt, width, 0, NULL, NULL, &error);
+    image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
 
 #endif
     return error;
 }
 
 
-clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   size_t height, cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
 {
-    cl_int err = Create(context, mem_flags, fmt, width, height);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, mem_flags, fmt, width, height );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
 }
 
-cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width,
-                                size_t height)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
 {
     cl_int error;
-#if defined(__APPLE__)
-    int protect_pages = 1;
-    cl_device_id devices[16];
-    size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
-    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
 
-    number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
     {
-        cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
-        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
-            protect_pages = 0;
-            break;
-        }
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
     }
 
-    if (protect_pages)
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
     {
-        size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
-        size_t rowStride = rowBytes + kPageSize;
-
-        // create backing store
-        backingStoreSize = height * rowStride + 8 * rowStride;
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
-
-        // add guard pages
-        size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
-        {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
-        }
-        p += rowBytes;
-        for (row = 0; row < height; row++)
-        {
-            mprotect(p, kPageSize, PROT_NONE);
-            p += rowStride;
-        }
-        p -= rowBytes;
-        for (row = 0; row < 4; row++)
-        {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
-        }
-
-        if (getenv("CL_ALIGN_RIGHT"))
-        {
-            static int spewEnv = 1;
-            if (spewEnv)
-            {
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
-                spewEnv = 0;
-            }
-            imagePtr += rowBytes - pixelBytes * width;
-        }
-
-        image = create_image_2d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
-                                width, height, rowStride, imagePtr, &error);
+      static int spewEnv = 1;
+      if(spewEnv)
+      {
+        log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+        spewEnv = 0;
+      }
+      imagePtr += rowBytes - pixelBytes * width;
     }
-    else
-    {
-        backingStore = NULL;
-        image = create_image_2d(context, mem_flags, fmt, width, height, 0, NULL,
-                                &error);
-    }
+
+      image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+      image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+  }
 #else
 
-    backingStore = NULL;
-    image = create_image_2d(context, mem_flags, fmt, width, height, 0, NULL,
-                            &error);
+  backingStore = NULL;
+  image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
 
 #endif
     return error;
 }
 
-clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   size_t height, size_t depth,
-                                   cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
 {
-    cl_int err = Create(context, mem_flags, fmt, width, height, depth);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, mem_flags, fmt, width, height, depth );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
 }
 
-cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width,
-                                size_t height, size_t depth)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
 {
     cl_int error;
 
-#if defined(__APPLE__)
-    int protect_pages = 1;
-    cl_device_id devices[16];
-    size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
-    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
 
-    number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * depth * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
     {
-        cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
-        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
-            protect_pages = 0;
-            break;
-        }
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height*depth; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
     }
 
-    if (protect_pages)
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
     {
-        size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
-        size_t rowStride = rowBytes + kPageSize;
-
-        // create backing store
-        backingStoreSize = height * depth * rowStride + 8 * rowStride;
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
-
-        // add guard pages
-        size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
+        static int spewEnv = 1;
+        if(spewEnv)
         {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+            spewEnv = 0;
         }
-        p += rowBytes;
-        for (row = 0; row < height * depth; row++)
-        {
-            mprotect(p, kPageSize, PROT_NONE);
-            p += rowStride;
-        }
-        p -= rowBytes;
-        for (row = 0; row < 4; row++)
-        {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
-        }
-
-        if (getenv("CL_ALIGN_RIGHT"))
-        {
-            static int spewEnv = 1;
-            if (spewEnv)
-            {
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
-                spewEnv = 0;
-            }
-            imagePtr += rowBytes - pixelBytes * width;
-        }
-
-        image = create_image_3d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
-                                width, height, depth, rowStride,
-                                height * rowStride, imagePtr, &error);
+        imagePtr += rowBytes - pixelBytes * width;
     }
-    else
-    {
-        backingStore = NULL;
-        image = create_image_3d(context, mem_flags, fmt, width, height, depth,
-                                0, 0, NULL, &error);
-    }
+
+    image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+  }
 #else
 
     backingStore = NULL;
-    image = create_image_3d(context, mem_flags, fmt, width, height, depth, 0, 0,
-                            NULL, &error);
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
 
 #endif
 
@@ -330,51 +276,37 @@
 }
 
 
-clProtectedImage::clProtectedImage(cl_context context,
-                                   cl_mem_object_type imageType,
-                                   cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   size_t height, size_t depth,
-                                   size_t arraySize, cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
 {
-    cl_int err = Create(context, imageType, mem_flags, fmt, width, height,
-                        depth, arraySize);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
 }
 
-cl_int clProtectedImage::Create(cl_context context,
-                                cl_mem_object_type imageType,
-                                cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width,
-                                size_t height, size_t depth, size_t arraySize)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
 {
     cl_int error;
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     int protect_pages = 1;
     cl_device_id devices[16];
     size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
     test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
 
     number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
-    {
+    for (int i=0; i<(int)number_of_devices; i++) {
         cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
         test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
+        if (type == CL_DEVICE_TYPE_GPU) {
             protect_pages = 0;
             break;
         }
     }
 
-    if (protect_pages)
-    {
+    if (protect_pages) {
         size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
         size_t rowStride = rowBytes + kPageSize;
 
         // create backing store
@@ -393,44 +325,37 @@
                 backingStoreSize = arraySize * rowStride + 8 * rowStride;
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                backingStoreSize =
-                    height * arraySize * rowStride + 8 * rowStride;
+                backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
                 break;
         }
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
 
         // add guard pages
         size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
         {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
         }
         p += rowBytes;
-        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1)
-            * (arraySize > 0 ? arraySize : 1);
-        for (row = 0; row < sz; row++)
+        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
+        for( row = 0; row < sz; row++ )
         {
-            mprotect(p, kPageSize, PROT_NONE);
-            p += rowStride;
+            mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
         }
         p -= rowBytes;
-        for (row = 0; row < 4; row++)
+        for( row = 0; row < 4; row++ )
         {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
         }
 
-        if (getenv("CL_ALIGN_RIGHT"))
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
         {
             static int spewEnv = 1;
-            if (spewEnv)
+            if(spewEnv)
             {
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
                 spewEnv = 0;
             }
             imagePtr += rowBytes - pixelBytes * width;
@@ -439,61 +364,42 @@
         switch (imageType)
         {
             case CL_MEM_OBJECT_IMAGE1D:
-                image = create_image_1d(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    rowStride, imagePtr, NULL, &error);
+                image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
-                image = create_image_2d(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    height, rowStride, imagePtr, &error);
+                image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
-                image =
-                    create_image_3d(context, mem_flags | CL_MEM_USE_HOST_PTR,
-                                    fmt, width, height, depth, rowStride,
-                                    height * rowStride, imagePtr, &error);
+                image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                image = create_image_1d_array(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    arraySize, rowStride, rowStride, imagePtr, &error);
+                image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                image = create_image_2d_array(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    height, arraySize, rowStride, height * rowStride, imagePtr,
-                    &error);
+                image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
                 break;
         }
-    }
-    else
-    {
+    } else {
         backingStore = NULL;
         switch (imageType)
         {
             case CL_MEM_OBJECT_IMAGE1D:
-                image = create_image_1d(context, mem_flags, fmt, width, 0, NULL,
-                                        NULL, &error);
+                image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
-                image = create_image_2d(context, mem_flags, fmt, width, height,
-                                        0, NULL, &error);
+                image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
-                image = create_image_3d(context, mem_flags, fmt, width, height,
-                                        depth, 0, 0, NULL, &error);
+                image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                image = create_image_1d_array(context, mem_flags, fmt, width,
-                                              arraySize, 0, 0, NULL, &error);
+                image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                image = create_image_2d_array(context, mem_flags, fmt, width,
-                                              height, arraySize, 0, 0, NULL,
-                                              &error);
+                image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
                 break;
         }
+
     }
 #else
 
@@ -501,25 +407,19 @@
     switch (imageType)
     {
         case CL_MEM_OBJECT_IMAGE1D:
-            image = create_image_1d(context, mem_flags, fmt, width, 0, NULL,
-                                    NULL, &error);
+            image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
             break;
         case CL_MEM_OBJECT_IMAGE2D:
-            image = create_image_2d(context, mem_flags, fmt, width, height, 0,
-                                    NULL, &error);
+            image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
             break;
         case CL_MEM_OBJECT_IMAGE3D:
-            image = create_image_3d(context, mem_flags, fmt, width, height,
-                                    depth, 0, 0, NULL, &error);
+            image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
             break;
         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            image = create_image_1d_array(context, mem_flags, fmt, width,
-                                          arraySize, 0, 0, NULL, &error);
+            image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
             break;
         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            image =
-                create_image_2d_array(context, mem_flags, fmt, width, height,
-                                      arraySize, 0, 0, NULL, &error);
+            image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
             break;
     }
 #endif
@@ -527,52 +427,55 @@
 }
 
 
+
 /*******
  * clProtectedArray implementation
  *******/
-clProtectedArray::clProtectedArray() { mBuffer = mValidBuffer = NULL; }
-
-clProtectedArray::clProtectedArray(size_t sizeInBytes)
+clProtectedArray::clProtectedArray()
 {
     mBuffer = mValidBuffer = NULL;
-    Allocate(sizeInBytes);
+}
+
+clProtectedArray::clProtectedArray( size_t sizeInBytes )
+{
+    mBuffer = mValidBuffer = NULL;
+    Allocate( sizeInBytes );
 }
 
 clProtectedArray::~clProtectedArray()
 {
-    if (mBuffer != NULL)
-    {
-#if defined(__APPLE__)
-        int error = munmap(mBuffer, mRealSize);
-        if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
+    if( mBuffer != NULL ) {
+#if defined( __APPLE__ )
+        int error = munmap( mBuffer, mRealSize );
+      if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
 #else
-        free(mBuffer);
+    free( mBuffer );
 #endif
-    }
+  }
 }
 
-void clProtectedArray::Allocate(size_t sizeInBytes)
+void clProtectedArray::Allocate( size_t sizeInBytes )
 {
 
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
 
-    // Allocate enough space to: round up our actual allocation to an even
-    // number of pages and allocate two pages on either side
-    mRoundedSize = ROUND_SIZE_UP(sizeInBytes, kPageSize);
+    // Allocate enough space to: round up our actual allocation to an even number of pages
+    // and allocate two pages on either side
+    mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
     mRealSize = mRoundedSize + kPageSize * 2;
 
-    // Use mmap here to ensure we start on a page boundary, so the mprotect
-    // calls will work OK
-    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE,
-                           MAP_ANON | MAP_PRIVATE, 0, 0);
+    // Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
+    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
 
     mValidBuffer = mBuffer + kPageSize;
 
     // Protect guard area from access
-    mprotect(mValidBuffer - kPageSize, kPageSize, PROT_NONE);
-    mprotect(mValidBuffer + mRoundedSize, kPageSize, PROT_NONE);
+    mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
+    mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
 #else
-    mRoundedSize = mRealSize = sizeInBytes;
-    mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
+  mRoundedSize = mRealSize = sizeInBytes;
+  mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
 #endif
 }
+
+

diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h
index 9a58a9d..384493f 100644
--- a/test_common/harness/typeWrappers.h
+++ b/test_common/harness/typeWrappers.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -31,376 +31,299 @@
 
 /* cl_context wrapper */
 
-class clContextWrapper {
-public:
-    clContextWrapper() { mContext = NULL; }
-    clContextWrapper(cl_context program) { mContext = program; }
-    ~clContextWrapper()
-    {
-        if (mContext != NULL) clReleaseContext(mContext);
-    }
+class clContextWrapper
+{
+    public:
+        clContextWrapper() { mContext = NULL; }
+        clContextWrapper( cl_context program ) { mContext = program; }
+        ~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
 
-    clContextWrapper &operator=(const cl_context &rhs)
-    {
-        mContext = rhs;
-        return *this;
-    }
-    operator cl_context() const { return mContext; }
+        clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
+        operator cl_context() const { return mContext; }
 
-    cl_context *operator&() { return &mContext; }
+        cl_context * operator&() { return &mContext; }
 
-    bool operator==(const cl_context &rhs) { return mContext == rhs; }
+        bool operator==( const cl_context &rhs ) { return mContext == rhs; }
 
-protected:
-    cl_context mContext;
+    protected:
+
+        cl_context mContext;
 };
 
 /* cl_program wrapper */
 
-class clProgramWrapper {
-public:
-    clProgramWrapper() { mProgram = NULL; }
-    clProgramWrapper(cl_program program) { mProgram = program; }
-    ~clProgramWrapper()
-    {
-        if (mProgram != NULL) clReleaseProgram(mProgram);
-    }
+class clProgramWrapper
+{
+    public:
+        clProgramWrapper() { mProgram = NULL; }
+        clProgramWrapper( cl_program program ) { mProgram = program; }
+        ~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
 
-    clProgramWrapper &operator=(const cl_program &rhs)
-    {
-        mProgram = rhs;
-        return *this;
-    }
-    operator cl_program() const { return mProgram; }
+        clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
+        operator cl_program() const { return mProgram; }
 
-    cl_program *operator&() { return &mProgram; }
+        cl_program * operator&() { return &mProgram; }
 
-    bool operator==(const cl_program &rhs) { return mProgram == rhs; }
+        bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
 
-protected:
-    cl_program mProgram;
+    protected:
+
+        cl_program mProgram;
 };
 
 /* cl_kernel wrapper */
 
-class clKernelWrapper {
-public:
-    clKernelWrapper() { mKernel = NULL; }
-    clKernelWrapper(cl_kernel kernel) { mKernel = kernel; }
-    ~clKernelWrapper()
-    {
-        if (mKernel != NULL) clReleaseKernel(mKernel);
-    }
+class clKernelWrapper
+{
+    public:
+        clKernelWrapper() { mKernel = NULL; }
+        clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
+        ~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
 
-    clKernelWrapper &operator=(const cl_kernel &rhs)
-    {
-        mKernel = rhs;
-        return *this;
-    }
-    operator cl_kernel() const { return mKernel; }
+        clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
+        operator cl_kernel() const { return mKernel; }
 
-    cl_kernel *operator&() { return &mKernel; }
+        cl_kernel * operator&() { return &mKernel; }
 
-    bool operator==(const cl_kernel &rhs) { return mKernel == rhs; }
+        bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
 
-protected:
-    cl_kernel mKernel;
+    protected:
+
+        cl_kernel mKernel;
 };
 
 /* cl_mem (stream) wrapper */
 
-class clMemWrapper {
-public:
-    clMemWrapper() { mMem = NULL; }
-    clMemWrapper(cl_mem mem) { mMem = mem; }
-    ~clMemWrapper()
-    {
-        if (mMem != NULL) clReleaseMemObject(mMem);
-    }
+class clMemWrapper
+{
+    public:
+        clMemWrapper() { mMem = NULL; }
+        clMemWrapper( cl_mem mem ) { mMem = mem; }
+        ~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
 
-    clMemWrapper &operator=(const cl_mem &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_mem() const { return mMem; }
+        clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
+        operator cl_mem() const { return mMem; }
 
-    cl_mem *operator&() { return &mMem; }
+        cl_mem * operator&() { return &mMem; }
 
-    bool operator==(const cl_mem &rhs) { return mMem == rhs; }
+        bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
 
-protected:
-    cl_mem mMem;
+    protected:
+
+        cl_mem mMem;
 };
 
-class clProtectedImage {
-public:
-    clProtectedImage()
-    {
-        image = NULL;
-        backingStore = NULL;
-    }
-    clProtectedImage(cl_context context, cl_mem_flags flags,
-                     const cl_image_format *fmt, size_t width,
-                     cl_int *errcode_ret);
-    clProtectedImage(cl_context context, cl_mem_flags flags,
-                     const cl_image_format *fmt, size_t width, size_t height,
-                     cl_int *errcode_ret);
-    clProtectedImage(cl_context context, cl_mem_flags flags,
-                     const cl_image_format *fmt, size_t width, size_t height,
-                     size_t depth, cl_int *errcode_ret);
-    clProtectedImage(cl_context context, cl_mem_object_type imageType,
-                     cl_mem_flags flags, const cl_image_format *fmt,
-                     size_t width, size_t height, size_t depth,
-                     size_t arraySize, cl_int *errcode_ret);
-    ~clProtectedImage()
-    {
-        if (image != NULL) clReleaseMemObject(image);
+class clProtectedImage
+{
+    public:
+        clProtectedImage() { image = NULL; backingStore = NULL; }
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
+        ~clProtectedImage()
+        {
+            if( image != NULL )
+                clReleaseMemObject( image );
 
-#if defined(__APPLE__)
-        if (backingStore) munmap(backingStore, backingStoreSize);
+#if defined( __APPLE__ )
+            if(backingStore)
+                munmap(backingStore, backingStoreSize);
 #endif
-    }
+        }
 
-    cl_int Create(cl_context context, cl_mem_flags flags,
-                  const cl_image_format *fmt, size_t width);
-    cl_int Create(cl_context context, cl_mem_flags flags,
-                  const cl_image_format *fmt, size_t width, size_t height);
-    cl_int Create(cl_context context, cl_mem_flags flags,
-                  const cl_image_format *fmt, size_t width, size_t height,
-                  size_t depth);
-    cl_int Create(cl_context context, cl_mem_object_type imageType,
-                  cl_mem_flags flags, const cl_image_format *fmt, size_t width,
-                  size_t height, size_t depth, size_t arraySize);
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
+        cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
 
-    clProtectedImage &operator=(const cl_mem &rhs)
-    {
-        image = rhs;
-        backingStore = NULL;
-        return *this;
-    }
-    operator cl_mem() { return image; }
+        clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
+        operator cl_mem() { return image; }
 
-    cl_mem *operator&() { return &image; }
+        cl_mem * operator&() { return &image; }
 
-    bool operator==(const cl_mem &rhs) { return image == rhs; }
+        bool operator==( const cl_mem &rhs ) { return image == rhs; }
 
-protected:
-    void *backingStore;
-    size_t backingStoreSize;
-    cl_mem image;
+    protected:
+        void *backingStore;
+        size_t backingStoreSize;
+        cl_mem  image;
 };
 
 /* cl_command_queue wrapper */
-class clCommandQueueWrapper {
-public:
-    clCommandQueueWrapper() { mMem = NULL; }
-    clCommandQueueWrapper(cl_command_queue mem) { mMem = mem; }
-    ~clCommandQueueWrapper()
-    {
-        if (mMem != NULL)
-        {
-            clReleaseCommandQueue(mMem);
-        }
-    }
+class clCommandQueueWrapper
+{
+    public:
+        clCommandQueueWrapper() { mMem = NULL; }
+        clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
+  ~clCommandQueueWrapper() { if( mMem != NULL ) { clReleaseCommandQueue( mMem ); } }
 
-    clCommandQueueWrapper &operator=(const cl_command_queue &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_command_queue() const { return mMem; }
+        clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
+        operator cl_command_queue() const { return mMem; }
 
-    cl_command_queue *operator&() { return &mMem; }
+        cl_command_queue * operator&() { return &mMem; }
 
-    bool operator==(const cl_command_queue &rhs) { return mMem == rhs; }
+        bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
 
-protected:
-    cl_command_queue mMem;
+    protected:
+
+        cl_command_queue mMem;
 };
 
 /* cl_sampler wrapper */
-class clSamplerWrapper {
-public:
-    clSamplerWrapper() { mMem = NULL; }
-    clSamplerWrapper(cl_sampler mem) { mMem = mem; }
-    ~clSamplerWrapper()
-    {
-        if (mMem != NULL) clReleaseSampler(mMem);
-    }
+class clSamplerWrapper
+{
+    public:
+        clSamplerWrapper() { mMem = NULL; }
+        clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
+        ~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
 
-    clSamplerWrapper &operator=(const cl_sampler &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_sampler() const { return mMem; }
+        clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
+        operator cl_sampler() const { return mMem; }
 
-    cl_sampler *operator&() { return &mMem; }
+        cl_sampler * operator&() { return &mMem; }
 
-    bool operator==(const cl_sampler &rhs) { return mMem == rhs; }
+        bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
 
-protected:
-    cl_sampler mMem;
+    protected:
+
+        cl_sampler mMem;
 };
 
 /* cl_event wrapper */
-class clEventWrapper {
-public:
-    clEventWrapper() { mMem = NULL; }
-    clEventWrapper(cl_event mem) { mMem = mem; }
-    ~clEventWrapper()
-    {
-        if (mMem != NULL) clReleaseEvent(mMem);
-    }
+class clEventWrapper
+{
+    public:
+        clEventWrapper() { mMem = NULL; }
+        clEventWrapper( cl_event mem ) { mMem = mem; }
+        ~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
 
-    clEventWrapper &operator=(const cl_event &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_event() const { return mMem; }
+        clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
+        operator cl_event() const { return mMem; }
 
-    cl_event *operator&() { return &mMem; }
+        cl_event * operator&() { return &mMem; }
 
-    bool operator==(const cl_event &rhs) { return mMem == rhs; }
+        bool operator==( const cl_event &rhs ) { return mMem == rhs; }
 
-protected:
-    cl_event mMem;
+    protected:
+
+        cl_event mMem;
 };
 
 /* Generic protected memory buffer, for verifying access within bounds */
-class clProtectedArray {
-public:
-    clProtectedArray();
-    clProtectedArray(size_t sizeInBytes);
-    virtual ~clProtectedArray();
+class clProtectedArray
+{
+    public:
+        clProtectedArray();
+        clProtectedArray( size_t sizeInBytes );
+        virtual ~clProtectedArray();
 
-    void Allocate(size_t sizeInBytes);
+        void    Allocate( size_t sizeInBytes );
 
-    operator void *() { return (void *)mValidBuffer; }
-    operator const void *() const { return (const void *)mValidBuffer; }
+        operator void *()        { return (void *)mValidBuffer; }
+        operator const void *() const { return (const void *)mValidBuffer; }
 
-protected:
-    char *mBuffer;
-    char *mValidBuffer;
-    size_t mRealSize, mRoundedSize;
+    protected:
+
+         char *    mBuffer;
+         char * mValidBuffer;
+        size_t    mRealSize, mRoundedSize;
 };
 
-class RandomSeed {
-public:
-    RandomSeed(cl_uint seed)
-    {
-        if (seed) log_info("(seed = %10.10u) ", seed);
-        mtData = init_genrand(seed);
-    }
-    ~RandomSeed()
-    {
-        if (gReSeed) gRandomSeed = genrand_int32(mtData);
-        free_mtdata(mtData);
-    }
+class RandomSeed
+{
+    public:
+        RandomSeed( cl_uint seed  ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
+        ~RandomSeed()
+        {
+            if( gReSeed )
+                gRandomSeed = genrand_int32( mtData );
+            free_mtdata(mtData);
+        }
 
-    operator MTdata() { return mtData; }
+        operator MTdata ()     {return mtData;}
 
-protected:
-    MTdata mtData;
+    protected:
+        MTdata mtData;
 };
 
 
-template <typename T> class BufferOwningPtr {
-    BufferOwningPtr(BufferOwningPtr const &); // do not implement
-    void operator=(BufferOwningPtr const &); // do not implement
+template <typename T> class BufferOwningPtr
+{
+  BufferOwningPtr(BufferOwningPtr const &); // do not implement
+    void operator=(BufferOwningPtr const &);  // do not implement
 
     void *ptr;
     void *map;
-    // Bytes allocated total, pointed to by map:
-    size_t mapsize;
-    // Bytes allocated in unprotected pages, pointed to by ptr:
-    size_t allocsize;
-    bool aligned;
-
-public:
-    explicit BufferOwningPtr(void *p = 0)
-        : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false)
-    {}
-    explicit BufferOwningPtr(void *p, void *m, size_t s)
-        : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
-    {
-#if !defined(__APPLE__)
-        if (m)
+  size_t mapsize;   // Bytes allocated total, pointed to by map.
+  size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
+  bool aligned;
+  public:
+  explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
+  explicit BufferOwningPtr(void *p, void *m, size_t s)
+    : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
+      {
+#if ! defined( __APPLE__ )
+        if(m)
         {
-            log_error("ERROR: unhandled code path. BufferOwningPtr allocated "
-                      "with mapped buffer!");
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
             abort();
         }
 #endif
-    }
-    ~BufferOwningPtr()
-    {
-        if (map)
-        {
-#if defined(__APPLE__)
-            int error = munmap(map, mapsize);
-            if (error)
-                log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+      }
+    ~BufferOwningPtr() {
+      if (map) {
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
 #endif
-        }
-        else
-        {
-            if (aligned)
-            {
-                align_free(ptr);
-            }
-            else
-            {
-                free(ptr);
-            }
-        }
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
     }
-    void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0,
-               bool aligned_ = false)
-    {
-        if (map)
-        {
-#if defined(__APPLE__)
-            int error = munmap(map, mapsize);
-            if (error)
-                log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+  void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
+      if (map){
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
 #else
-            log_error("ERROR: unhandled code path. BufferOwningPtr reset with "
-                      "mapped buffer!");
-            abort();
+        log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
+        abort();
 #endif
-        }
-        else
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+      ptr = p;
+      map = m;
+      mapsize = mapsize_;
+      allocsize =  (ptr != NULL) ? allocsize_ : 0; // Force allocsize to zero if ptr is NULL.
+      aligned = aligned_;
+#if ! defined( __APPLE__ )
+        if(m)
         {
-            if (aligned)
-            {
-                align_free(ptr);
-            }
-            else
-            {
-                free(ptr);
-            }
-        }
-        ptr = p;
-        map = m;
-        mapsize = mapsize_;
-        // Force allocsize to zero if ptr is NULL:
-        allocsize = (ptr != NULL) ? allocsize_ : 0;
-        aligned = aligned_;
-#if !defined(__APPLE__)
-        if (m)
-        {
-            log_error("ERROR: unhandled code path. BufferOwningPtr allocated "
-                      "with mapped buffer!");
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
             abort();
         }
 #endif
     }
-    operator T *() { return (T *)ptr; }
+    operator T*() { return (T*)ptr; }
 
-    size_t getSize() const { return allocsize; };
+      size_t getSize() const { return allocsize; };
 };
 
 #endif // _typeWrappers_h
+

diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt
index 363ece8..b5125be 100644
--- a/test_conformance/CMakeLists.txt
+++ b/test_conformance/CMakeLists.txt

@@ -21,7 +21,6 @@
 endif(D3D11_IS_SUPPORTED)
 add_subdirectory( device_partition )
 add_subdirectory( events )
-add_subdirectory( extensions )
 add_subdirectory( geometrics )
 if(GL_IS_SUPPORTED)
    add_subdirectory( gl )
@@ -40,7 +39,8 @@
 add_subdirectory( relationals )
 add_subdirectory( select )
 add_subdirectory( thread_dimensions )
-add_subdirectory( vectors )
+add_subdirectory( vec_align )
+add_subdirectory( vec_step )
 add_subdirectory( c11_atomics )
 add_subdirectory( device_execution )
 add_subdirectory( non_uniform_work_group )
@@ -50,15 +50,13 @@
 add_subdirectory( workgroups )
 add_subdirectory( pipes )
 add_subdirectory( device_timer )
+add_subdirectory( clcpp )
 add_subdirectory( spirv_new )
 add_subdirectory( spir )
 
 file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")
 
-set(PY_FILES
-    generate_spirv_offline.py
-    run_conformance.py
-)
+set(PY_FILES run_conformance.py)
 
 # Copy .csv files
 foreach(FILE ${CSV_FILES})

diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp
index 56fb24f..fe4aa59 100644
--- a/test_conformance/SVM/main.cpp
+++ b/test_conformance/SVM/main.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -213,15 +213,14 @@
       return -1;
     }
     bool extensions_supported = true;
-    for (auto extension : extensions_list)
+    for (auto extension : extensions_list) 
     {
-        if (!is_extension_available(devices[i], extension.c_str()))
-        {
-            log_error("Required extension not found - device id %d - %s\n", i,
-                      extension.c_str());
-            extensions_supported = false;
-            break;
-        }
+      if (!is_extension_available(devices[i], extension.c_str())) 
+      {
+        log_error("Required extension not found - device id %d - %s\n", i, extension.c_str());
+        extensions_supported = false;
+        break;
+      }
     }
     if((caps & required_svm_caps) == required_svm_caps && extensions_supported)
     {
@@ -250,11 +249,10 @@
     test_error(error, "clCreateCommandQueue failed");
   }
 
-  if (ppCodeString)
+  if(ppCodeString)
   {
-      error =
-          create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0);
-      test_error(error, "failed to create program");
+    error = create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0, "-cl-std=CL2.0");
+    test_error( error, "failed to create program" );
   }
 
   return 0;
@@ -282,12 +280,9 @@
 test_status InitCL(cl_device_id device) {
   auto version = get_device_cl_version(device);
   auto expected_min_version = Version(2, 0);
-  if (version < expected_min_version)
-  {
-      version_expected_info("Test", "OpenCL",
-                            expected_min_version.to_string().c_str(),
-                            version.to_string().c_str());
-      return TEST_SKIP;
+  if (version < expected_min_version) {
+    version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+    return TEST_SKIP;
   }
 
   int error;

diff --git a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp
index b28db41..42ea0bd 100644
--- a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp
+++ b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,33 +16,27 @@
 #include "common.h"
 
 static char hash_table_kernel[] =
-    "#if 0\n"
-    "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
-    "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
-    "#endif\n"
-    "typedef struct BinNode {\n"
-    " int value;\n"
-    " atomic_uintptr_t pNext;\n"
-    "} BinNode;\n"
+  "#if 0\n"
+  "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
+  "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
+  "#endif\n"
+  "typedef struct BinNode {\n"
+  " int value;\n"
+  " atomic_uintptr_t pNext;\n"
+  "} BinNode;\n"
 
-    "__kernel void build_hash_table(__global uint* input, __global BinNode* "
-    "pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
-    "{\n"
-    " __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, "
-    "1u, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
-    " uint i = get_global_id(0);\n"
-    " uint b = input[i] % numBins;\n"
-    " pNew->value = input[i];\n"
-    " uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), "
-    "memory_order_seq_cst, memory_scope_all_svm_devices);\n"
-    " do\n"
-    " {\n"
-    "   atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, "
-    "memory_scope_all_svm_devices);\n" // always inserting at head of list
-    " } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), "
-    "&next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, "
-    "memory_scope_all_svm_devices));\n"
-    "}\n";
+  "__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
+  "{\n"
+  " __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
+  " uint i = get_global_id(0);\n"
+  " uint b = input[i] % numBins;\n"
+  " pNew->value = input[i];\n"
+  " uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
+  " do\n"
+  " {\n"
+  "   atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
+  " } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
+  "}\n";
 
 typedef struct BinNode{
   cl_uint value;

diff --git a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
index 0b94cbf..4cc3495 100644
--- a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
+++ b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,19 +17,15 @@
 
 const char *find_targets_kernel[] = {
 
-    "__kernel void find_targets(__global uint* image, uint target, volatile "
-    "__global atomic_uint *numTargetsFound, volatile __global atomic_uint "
-    "*targetLocations)\n"
-    "{\n"
-    " size_t i = get_global_id(0);\n"
-    " uint index;\n"
-    " if(image[i] == target) {\n"
-    "   index = atomic_fetch_add_explicit(numTargetsFound, 1u, "
-    "memory_order_relaxed, memory_scope_device); \n"
-    "   atomic_exchange_explicit(&targetLocations[index], i, "
-    "memory_order_relaxed, memory_scope_all_svm_devices); \n"
-    " }\n"
-    "}\n"
+  "__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
+  "{\n"
+  " size_t i = get_global_id(0);\n"
+  " uint index;\n"
+  " if(image[i] == target) {\n"
+  "   index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
+  "   atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
+  " }\n"
+  "}\n"
 };
 
 

diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp
index 2a1ce05..3a39eae 100644
--- a/test_conformance/SVM/test_migrate.cpp
+++ b/test_conformance/SVM/test_migrate.cpp

@@ -199,24 +199,6 @@
     error = clFlush(queues[1]);
     test_error(error, "clFlush failed");
 
-    // Check the event command type for clEnqueueSVMMigrateMem (OpenCL 3.0 and
-    // newer)
-    Version version = get_device_cl_version(deviceID);
-    if (version >= Version(3, 0))
-    {
-        cl_command_type commandType;
-        error = clGetEventInfo(evs[3], CL_EVENT_COMMAND_TYPE,
-                               sizeof(commandType), &commandType, NULL);
-        test_error(error, "clGetEventInfo failed");
-        if (commandType != CL_COMMAND_SVM_MIGRATE_MEM)
-        {
-            log_error("Invalid command type returned for "
-                      "clEnqueueSVMMigrateMem: %X\n",
-                      commandType);
-            return TEST_FAIL;
-        }
-    }
-
     error = wait_and_release("first batch", evs, 8);
     if (error)
         return -1;

diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index d3e6c6a..d330b4b 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt

@@ -2,19 +2,16 @@
 
 set(${MODULE_NAME}_SOURCES
          main.cpp
-         negative_platform.cpp
-         test_api_consistency.cpp
          test_bool.cpp
          test_retain.cpp
          test_retain_program.cpp
          test_queries.cpp
+         test_queries_compatibility.cpp
          test_create_kernels.cpp
          test_kernels.cpp
-         test_kernel_private_memory_size.cpp
          test_api_min_max.cpp
          test_kernel_arg_changes.cpp
          test_kernel_arg_multi_setup.cpp
-         test_kernel_attributes.cpp
          test_binary.cpp
          test_native_kernel.cpp
          test_mem_objects.cpp
@@ -22,19 +19,14 @@
          test_device_min_data_type_align_size_alignment.cpp
          test_platform.cpp
          test_kernel_arg_info.cpp
+         test_kernel_arg_info_compatibility.cpp
          test_null_buffer_arg.cpp
          test_mem_object_info.cpp
-         test_min_image_formats.cpp
-         test_queue.cpp
          test_queue_hint.cpp
          test_queue_properties.cpp
          test_sub_group_dispatch.cpp
          test_clone_kernel.cpp
          test_zero_sized_enqueue.cpp
-         test_context_destructor_callback.cpp
-         test_mem_object_properties_queries.cpp
-         test_queue_properties_queries.cpp
-         test_pipe_properties_queries.cpp
 )
 
 include(../CMakeCommon.txt)

diff --git a/test_conformance/api/main.cpp b/test_conformance/api/main.cpp
index fa76a40..72cbbea 100644
--- a/test_conformance/api/main.cpp
+++ b/test_conformance/api/main.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,133 +26,107 @@
 #endif
 
 // FIXME: To use certain functions in harness/imageHelpers.h
-// (for example, generate_random_image_data()), the tests are required to
-// declare the following variables (<rdar://problem/11111245>):
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables (<rdar://problem/11111245>):
 
 test_definition test_list[] = {
-    ADD_TEST(get_platform_info),
-    ADD_TEST_VERSION(get_sampler_info, Version(2, 0)),
-    ADD_TEST(get_sampler_info_compatibility),
-    ADD_TEST_VERSION(get_command_queue_info, Version(2, 0)),
-    ADD_TEST(get_command_queue_info_compatibility),
-    ADD_TEST(get_context_info),
-    ADD_TEST(get_device_info),
-    ADD_TEST(enqueue_task),
-    ADD_TEST(binary_get),
-    ADD_TEST(binary_create),
-    ADD_TEST(kernel_required_group_size),
+    ADD_TEST( get_platform_info ),
+    ADD_TEST_VERSION( get_sampler_info, Version(2, 0) ),
+    ADD_TEST( get_sampler_info_compatibility ),
+    ADD_TEST_VERSION( get_command_queue_info, Version(2, 0) ),
+    ADD_TEST( get_command_queue_info_compatibility ),
+    ADD_TEST( get_context_info ),
+    ADD_TEST( get_device_info ),
+    ADD_TEST( enqueue_task ),
+    ADD_TEST( binary_get ),
+    ADD_TEST( binary_create ),
+    ADD_TEST( kernel_required_group_size ),
 
-    ADD_TEST(release_kernel_order),
-    ADD_TEST(release_during_execute),
+    ADD_TEST( release_kernel_order ),
+    ADD_TEST( release_during_execute ),
 
-    ADD_TEST(load_single_kernel),
-    ADD_TEST(load_two_kernels),
-    ADD_TEST(load_two_kernels_in_one),
-    ADD_TEST(load_two_kernels_manually),
-    ADD_TEST(get_program_info_kernel_names),
-    ADD_TEST(get_kernel_arg_info),
-    ADD_TEST(create_kernels_in_program),
-    ADD_TEST(get_kernel_info),
-    ADD_TEST(kernel_private_memory_size),
-    ADD_TEST(execute_kernel_local_sizes),
-    ADD_TEST(set_kernel_arg_by_index),
-    ADD_TEST(set_kernel_arg_constant),
-    ADD_TEST(set_kernel_arg_struct_array),
-    ADD_TEST(kernel_global_constant),
-    ADD_TEST(kernel_attributes),
+    ADD_TEST( load_single_kernel ),
+    ADD_TEST( load_two_kernels ),
+    ADD_TEST( load_two_kernels_in_one ),
+    ADD_TEST( load_two_kernels_manually ),
+    ADD_TEST( get_program_info_kernel_names ),
+    ADD_TEST( get_kernel_arg_info ),
+    ADD_TEST( get_kernel_arg_info_compatibility ),
+    ADD_TEST( create_kernels_in_program ),
+    ADD_TEST( get_kernel_info ),
+    ADD_TEST( execute_kernel_local_sizes ),
+    ADD_TEST( set_kernel_arg_by_index ),
+    ADD_TEST( set_kernel_arg_constant ),
+    ADD_TEST( set_kernel_arg_struct_array ),
+    ADD_TEST( kernel_global_constant ),
 
-    ADD_TEST(min_max_thread_dimensions),
-    ADD_TEST(min_max_work_items_sizes),
-    ADD_TEST(min_max_work_group_size),
-    ADD_TEST(min_max_read_image_args),
-    ADD_TEST(min_max_write_image_args),
-    ADD_TEST(min_max_mem_alloc_size),
-    ADD_TEST(min_max_image_2d_width),
-    ADD_TEST(min_max_image_2d_height),
-    ADD_TEST(min_max_image_3d_width),
-    ADD_TEST(min_max_image_3d_height),
-    ADD_TEST(min_max_image_3d_depth),
-    ADD_TEST(min_max_image_array_size),
-    ADD_TEST(min_max_image_buffer_size),
-    ADD_TEST(min_max_parameter_size),
-    ADD_TEST(min_max_samplers),
-    ADD_TEST(min_max_constant_buffer_size),
-    ADD_TEST(min_max_constant_args),
-    ADD_TEST(min_max_compute_units),
-    ADD_TEST(min_max_address_bits),
-    ADD_TEST(min_max_single_fp_config),
-    ADD_TEST(min_max_double_fp_config),
-    ADD_TEST(min_max_local_mem_size),
-    ADD_TEST(min_max_kernel_preferred_work_group_size_multiple),
-    ADD_TEST(min_max_execution_capabilities),
-    ADD_TEST(min_max_queue_properties),
-    ADD_TEST(min_max_device_version),
-    ADD_TEST(min_max_language_version),
+    ADD_TEST( min_max_thread_dimensions ),
+    ADD_TEST( min_max_work_items_sizes ),
+    ADD_TEST( min_max_work_group_size ),
+    ADD_TEST( min_max_read_image_args ),
+    ADD_TEST( min_max_write_image_args ),
+    ADD_TEST( min_max_mem_alloc_size ),
+    ADD_TEST( min_max_image_2d_width ),
+    ADD_TEST( min_max_image_2d_height ),
+    ADD_TEST( min_max_image_3d_width ),
+    ADD_TEST( min_max_image_3d_height ),
+    ADD_TEST( min_max_image_3d_depth ),
+    ADD_TEST( min_max_image_array_size ),
+    ADD_TEST( min_max_image_buffer_size ),
+    ADD_TEST( min_max_parameter_size ),
+    ADD_TEST( min_max_samplers ),
+    ADD_TEST( min_max_constant_buffer_size ),
+    ADD_TEST( min_max_constant_args ),
+    ADD_TEST( min_max_compute_units ),
+    ADD_TEST( min_max_address_bits ),
+    ADD_TEST( min_max_single_fp_config ),
+    ADD_TEST( min_max_double_fp_config ),
+    ADD_TEST( min_max_local_mem_size ),
+    ADD_TEST( min_max_kernel_preferred_work_group_size_multiple ),
+    ADD_TEST( min_max_execution_capabilities ),
+    ADD_TEST( min_max_queue_properties ),
+    ADD_TEST( min_max_device_version ),
+    ADD_TEST( min_max_language_version ),
 
-    ADD_TEST(kernel_arg_changes),
-    ADD_TEST(kernel_arg_multi_setup_random),
+    ADD_TEST( kernel_arg_changes ),
+    ADD_TEST( kernel_arg_multi_setup_random ),
 
-    ADD_TEST(native_kernel),
+    ADD_TEST( native_kernel ),
 
-    ADD_TEST(create_context_from_type),
+    ADD_TEST( create_context_from_type ),
 
-    ADD_TEST(platform_extensions),
-    ADD_TEST(get_platform_ids),
-    ADD_TEST(bool_type),
+    ADD_TEST( platform_extensions ),
+    ADD_TEST( get_platform_ids ),
+    ADD_TEST( bool_type ),
 
-    ADD_TEST(repeated_setup_cleanup),
+    ADD_TEST( repeated_setup_cleanup ),
 
-    ADD_TEST(retain_queue_single),
-    ADD_TEST(retain_queue_multiple),
-    ADD_TEST(retain_mem_object_single),
-    ADD_TEST(retain_mem_object_multiple),
-    ADD_TEST(retain_mem_object_set_kernel_arg),
-    ADD_TEST(min_data_type_align_size_alignment),
+    ADD_TEST( retain_queue_single ),
+    ADD_TEST( retain_queue_multiple ),
+    ADD_TEST( retain_mem_object_single ),
+    ADD_TEST( retain_mem_object_multiple ),
+    ADD_TEST( retain_mem_object_set_kernel_arg ),
+    ADD_TEST( min_data_type_align_size_alignment ),
 
-    ADD_TEST_VERSION(context_destructor_callback, Version(3, 0)),
-    ADD_TEST(mem_object_destructor_callback),
-    ADD_TEST(null_buffer_arg),
-    ADD_TEST(get_buffer_info),
-    ADD_TEST(get_image2d_info),
-    ADD_TEST(get_image3d_info),
-    ADD_TEST(get_image1d_info),
-    ADD_TEST(get_image1d_array_info),
-    ADD_TEST(get_image2d_array_info),
-    ADD_TEST(queue_flush_on_release),
-    ADD_TEST(queue_hint),
-    ADD_TEST(queue_properties),
-    ADD_TEST_VERSION(sub_group_dispatch, Version(2, 1)),
-    ADD_TEST_VERSION(clone_kernel, Version(2, 1)),
-    ADD_TEST_VERSION(zero_sized_enqueue, Version(2, 1)),
-
-    ADD_TEST_VERSION(buffer_properties_queries, Version(3, 0)),
-    ADD_TEST_VERSION(image_properties_queries, Version(3, 0)),
-    ADD_TEST_VERSION(queue_properties_queries, Version(3, 0)),
-    ADD_TEST_VERSION(pipe_properties_queries, Version(3, 0)),
-
-    ADD_TEST_VERSION(consistency_svm, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_memory_model, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_device_enqueue, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_pipes, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_progvar, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_non_uniform_work_group, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_read_write_images, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_2d_image_from_buffer, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_depth_images, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_device_and_host_timer, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_il_programs, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_subgroups, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_prog_ctor_dtor, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_3d_image_writes, Version(3, 0)),
-
-    ADD_TEST(min_image_formats),
-    ADD_TEST(negative_get_platform_info),
-    ADD_TEST(negative_get_platform_ids),
+    ADD_TEST( mem_object_destructor_callback ),
+    ADD_TEST( null_buffer_arg ),
+    ADD_TEST( get_buffer_info ),
+    ADD_TEST( get_image2d_info ),
+    ADD_TEST( get_image3d_info ),
+    ADD_TEST( get_image1d_info ),
+    ADD_TEST( get_image1d_array_info ),
+    ADD_TEST( get_image2d_array_info ),
+    ADD_TEST( queue_hint ),
+    ADD_TEST( queue_properties ),
+    ADD_TEST_VERSION( sub_group_dispatch, Version(2, 1) ),
+    ADD_TEST_VERSION( clone_kernel, Version(2, 1) ),
+    ADD_TEST_VERSION( zero_sized_enqueue, Version(2, 1) ),
 };
 
-const int test_num = ARRAY_SIZE(test_list);
+const int test_num = ARRAY_SIZE( test_list );
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
+

diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp
deleted file mode 100644
index 7d9de5d..0000000
--- a/test_conformance/api/negative_platform.cpp
+++ /dev/null

@@ -1,74 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "testBase.h"
-
-int test_negative_get_platform_ids(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements)
-{
-    cl_platform_id platform;
-    cl_int err = clGetPlatformIDs(0, &platform, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformIDs should return CL_INVALID_VALUE when: \"num_entries "
-        "is equal to zero and platforms is not NULL\"",
-        TEST_FAIL);
-
-    err = clGetPlatformIDs(1, nullptr, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformIDs should return CL_INVALID_VALUE when: \"both "
-        "num_platforms and platforms are NULL\"",
-        TEST_FAIL);
-
-    return TEST_PASS;
-}
-
-int test_negative_get_platform_info(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-{
-    cl_platform_id platform = getPlatformFromDevice(deviceID);
-
-    cl_int err =
-        clGetPlatformInfo(reinterpret_cast<cl_platform_id>(deviceID),
-                          CL_PLATFORM_VERSION, sizeof(char*), nullptr, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_PLATFORM,
-        "clGetPlatformInfo should return CL_INVALID_PLATFORM  when: \"platform "
-        "is not a valid platform\" using a valid object which is NOT a "
-        "platform",
-        TEST_FAIL);
-
-    constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
-    err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "
-        "is not one of the supported values\"",
-        TEST_FAIL);
-
-    char* version;
-    err =
-        clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, &version, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformInfo should return CL_INVALID_VALUE when: \"size in "
-        "bytes specified by param_value_size is < size of return type and "
-        "param_value is not a NULL value\"",
-        TEST_FAIL);
-
-    return TEST_PASS;
-}

diff --git a/test_conformance/api/procs.h b/test_conformance/api/procs.h
index 1bcb311..10b3ea3 100644
--- a/test_conformance/api/procs.h
+++ b/test_conformance/api/procs.h

@@ -47,10 +47,6 @@
 extern int        test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
 extern int        test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_kernel_private_memory_size(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
 extern int        test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
@@ -102,14 +98,7 @@
 extern int        test_retain_mem_object_set_kernel_arg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
 
-extern int test_context_destructor_callback(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_mem_object_destructor_callback(cl_device_id deviceID,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements);
+extern int        test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
 extern int      test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
 extern int      test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
@@ -119,88 +108,9 @@
 extern int      test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_get_kernel_arg_info_compatibility( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
 extern int      test_queue_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_zero_sized_enqueue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int test_queue_flush_on_release(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_buffer_properties_queries(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_image_properties_queries(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_queue_properties_queries(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-int test_pipe_properties_queries(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-
-extern int test_consistency_svm(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue, int num_elements);
-extern int test_consistency_memory_model(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_consistency_device_enqueue(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_consistency_pipes(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_consistency_progvar(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_consistency_non_uniform_work_group(cl_device_id deviceID,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements);
-extern int test_consistency_read_write_images(cl_device_id deviceID,
-                                              cl_context context,
-                                              cl_command_queue queue,
-                                              int num_elements);
-extern int test_consistency_2d_image_from_buffer(cl_device_id deviceID,
-                                                 cl_context context,
-                                                 cl_command_queue queue,
-                                                 int num_elements);
-extern int test_consistency_depth_images(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_consistency_device_and_host_timer(cl_device_id deviceID,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_consistency_il_programs(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
-extern int test_consistency_subgroups(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements);
-extern int test_consistency_prog_ctor_dtor(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_consistency_3d_image_writes(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-
-extern int test_min_image_formats(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_negative_get_platform_info(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_negative_get_platform_ids(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_kernel_attributes(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);

diff --git a/test_conformance/api/test_api_consistency.cpp b/test_conformance/api/test_api_consistency.cpp
deleted file mode 100644
index d6c4bba..0000000
--- a/test_conformance/api/test_api_consistency.cpp
+++ /dev/null

@@ -1,1150 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/testHarness.h"
-#include "harness/deviceInfo.h"
-
-static const char* test_kernel = R"CLC(
-__kernel void test(__global int* dst) {
-    dst[0] = 0;
-}
-)CLC";
-
-int test_consistency_svm(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_SVM_CAPABILITIES:
-    // May return 0, indicating that device does not support Shared Virtual
-    // Memory.
-    cl_int error;
-
-    const size_t allocSize = 16;
-    clMemWrapper mem;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    cl_device_svm_capabilities svmCaps = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
-                            sizeof(svmCaps), &svmCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_SVM_CAPABILITIES");
-
-    if (svmCaps == 0)
-    {
-        // Test setup:
-
-        mem =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, allocSize, NULL, &error);
-        test_error(error, "Unable to create test buffer");
-
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-
-        // clGetMemObjectInfo, passing CL_MEM_USES_SVM_POINTER
-        // Returns CL_FALSE if no devices in the context associated with
-        // memobj support Shared Virtual Memory.
-        cl_bool usesSVMPointer;
-        error =
-            clGetMemObjectInfo(mem, CL_MEM_USES_SVM_POINTER,
-                               sizeof(usesSVMPointer), &usesSVMPointer, NULL);
-        test_error(error, "Unable to query CL_MEM_USES_SVM_POINTER");
-        test_assert_error(usesSVMPointer == CL_FALSE,
-                          "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-                          "CL_MEM_USES_SVM_POINTER did not return CL_FALSE");
-
-        // Check that the SVM APIs can be called.
-
-        // Returns NULL if no devices in context support Shared Virtual Memory.
-        void* ptr0 = clSVMAlloc(context, CL_MEM_READ_WRITE, allocSize, 0);
-        void* ptr1 = clSVMAlloc(context, CL_MEM_READ_WRITE, allocSize, 0);
-        test_assert_error(ptr0 == NULL && ptr1 == NULL,
-                          "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-                          "clSVMAlloc returned a non-NULL value");
-
-        // clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill,
-        // clEnqueueSVMMap, clEnqueueSVMUnmap, clEnqueueSVMMigrateMem Returns
-        // CL_INVALID_OPERATION if the device associated with command_queue does
-        // not support Shared Virtual Memory.
-
-        // These calls purposefully pass bogus pointers to the functions to
-        // better test that they are a NOP when SVM is not supported.
-        void* bogus0 = (void*)0xDEADBEEF;
-        void* bogus1 = (void*)0xDEADDEAD;
-        cl_uint pattern = 0xAAAAAAAA;
-        error = clEnqueueSVMMemFill(queue, bogus0, &pattern, sizeof(pattern),
-                                    allocSize, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but clEnqueueSVMMemFill did "
-            "not return CL_INVALID_OPERATION");
-
-        error = clEnqueueSVMMemcpy(queue, CL_TRUE, bogus1, bogus0, allocSize, 0,
-                                   NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMMemcpy did not return CL_INVALID_OPERATION");
-
-        error = clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, bogus1, allocSize,
-                                0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMMap did not return CL_INVALID_OPERATION");
-
-        error = clEnqueueSVMUnmap(queue, bogus1, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMUnmap did not return CL_INVALID_OPERATION");
-
-        error = clEnqueueSVMMigrateMem(queue, 1, (const void**)&bogus1, NULL, 0,
-                                       0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMMigrateMem did not return CL_INVALID_OPERATION");
-
-        // If the enqueue calls above did not return errors, a clFinish would be
-        // needed here to ensure the SVM operations are complete before freeing
-        // the SVM pointers.
-
-        clSVMFree(context, bogus0);
-        error = clEnqueueSVMFree(queue, 1, &bogus0, NULL, NULL, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMFree did not return CL_INVALID_OPERATION");
-
-        // If the enqueue calls above did not return errors, a clFinish should
-        // be included here to ensure the enqueued SVM free is complete.
-
-        // clSetKernelArgSVMPointer, clSetKernelExecInfo
-        // Returns CL_INVALID_OPERATION if no devices in the context associated
-        // with kernel support Shared Virtual Memory.
-
-        error = clSetKernelArgSVMPointer(kernel, 0, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clSetKernelArgSVMPointer did not return CL_INVALID_OPERATION");
-
-        error =
-            clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, 0, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clSetKernelExecInfo did not return CL_INVALID_OPERATION");
-    }
-
-    return TEST_PASS;
-}
-
-static int check_atomic_capabilities(cl_device_atomic_capabilities atomicCaps,
-                                     cl_device_atomic_capabilities requiredCaps)
-{
-    if ((atomicCaps & requiredCaps) != requiredCaps)
-    {
-        log_error("Atomic capabilities %llx is missing support for at least "
-                  "one required capability %llx!\n",
-                  atomicCaps, requiredCaps);
-        return TEST_FAIL;
-    }
-
-    if ((atomicCaps & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES requires "
-                  "support for CL_DEVICE_ATOMIC_SCOPE_DEVICE!\n");
-        return TEST_FAIL;
-    }
-
-    if ((atomicCaps & CL_DEVICE_ATOMIC_SCOPE_DEVICE) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_SCOPE_DEVICE requires "
-                  "support for CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP!\n");
-        return TEST_FAIL;
-    }
-
-    if ((atomicCaps & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_ORDER_SEQ_CST requires "
-                  "support for CL_DEVICE_ATOMIC_ORDER_ACQ_REL!\n");
-        return TEST_FAIL;
-    }
-
-    if ((atomicCaps & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_ORDER_ACQ_REL requires "
-                  "support for CL_DEVICE_ATOMIC_ORDER_RELAXED!\n");
-        return TEST_FAIL;
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_memory_model(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-{
-    cl_int error;
-    cl_device_atomic_capabilities atomicCaps = 0;
-
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-                            sizeof(atomicCaps), &atomicCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES");
-
-    error = check_atomic_capabilities(atomicCaps,
-                                      CL_DEVICE_ATOMIC_ORDER_RELAXED
-                                          | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP);
-    if (error == TEST_FAIL)
-    {
-        log_error("Checks failed for CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES\n");
-        return error;
-    }
-
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
-                            sizeof(atomicCaps), &atomicCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_ATOMIC_FENCE_CAPABILITIES");
-
-    error = check_atomic_capabilities(atomicCaps,
-                                      CL_DEVICE_ATOMIC_ORDER_RELAXED
-                                          | CL_DEVICE_ATOMIC_ORDER_ACQ_REL
-                                          | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP);
-    if (error == TEST_FAIL)
-    {
-        log_error("Checks failed for CL_DEVICE_ATOMIC_FENCE_CAPABILITIES\n");
-        return error;
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_device_enqueue(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES
-    // May return 0, indicating that device does not support Device-Side Enqueue
-    // and On-Device Queues.
-    cl_int error;
-
-    cl_device_device_enqueue_capabilities dseCaps = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
-                            sizeof(dseCaps), &dseCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES");
-
-    if (dseCaps == 0)
-    {
-        // clGetDeviceInfo, passing CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES
-        // Returns 0 if device does not support Device-Side Enqueue and
-        // On-Device Queues.
-
-        cl_command_queue_properties devQueueProps = 0;
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
-                                sizeof(devQueueProps), &devQueueProps, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES");
-        test_assert_error(
-            devQueueProps == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES returned a non-zero value");
-
-        // clGetDeviceInfo, passing
-        // CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE,
-        // CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
-        // CL_DEVICE_MAX_ON_DEVICE_QUEUES, or
-        // CL_DEVICE_MAX_ON_DEVICE_EVENTS
-        // Returns 0 if device does not support Device-Side Enqueue and
-        // On-Device Queues.
-
-        cl_uint u = 0;
-
-        error =
-            clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE,
-                            sizeof(u), &u, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE");
-        test_assert_error(u == 0,
-                          "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 "
-                          "but CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE "
-                          "returned a non-zero value");
-
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE returned a non-zero value");
-
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_ON_DEVICE_QUEUES,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_MAX_ON_DEVICE_QUEUES");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_MAX_ON_DEVICE_QUEUES returned a non-zero value");
-
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_ON_DEVICE_EVENTS,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_MAX_ON_DEVICE_EVENTS");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_MAX_ON_DEVICE_EVENTS returned a non-zero value");
-
-        // clGetCommandQueueInfo, passing CL_QUEUE_SIZE
-        // Returns CL_INVALID_COMMAND_QUEUE since command_queue cannot be a
-        // valid device command-queue.
-
-        error =
-            clGetCommandQueueInfo(queue, CL_QUEUE_SIZE, sizeof(u), &u, NULL);
-        test_failure_error(
-            error, CL_INVALID_COMMAND_QUEUE,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_QUEUE_SIZE did not return CL_INVALID_COMMAND_QUEUE");
-
-        cl_command_queue q = NULL;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE_DEFAULT, sizeof(q),
-                                      &q, NULL);
-        test_error(error, "Unable to query CL_QUEUE_DEVICE_DEFAULT");
-        test_assert_error(
-            q == NULL,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_QUEUE_DEVICE_DEFAULT returned a non-NULL value");
-
-        // clSetDefaultDeviceCommandQueue
-        // Returns CL_INVALID_OPERATION if device does not support On-Device
-        // Queues.
-        error = clSetDefaultDeviceCommandQueue(context, deviceID, NULL);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 "
-                           "but clSetDefaultDeviceCommandQueue did not return "
-                           "CL_INVALID_OPERATION");
-    }
-    else
-    {
-        if ((dseCaps & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT) == 0)
-        {
-            // clSetDefaultDeviceCommandQueue
-            // Returns CL_INVALID_OPERATION if device does not support a
-            // replaceable default On-Device Queue.
-            error = clSetDefaultDeviceCommandQueue(context, deviceID, NULL);
-            test_failure_error(
-                error, CL_INVALID_OPERATION,
-                "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES did not "
-                "include CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT but "
-                "clSetDefaultDeviceCommandQueue did not return "
-                "CL_INVALID_OPERATION");
-        }
-
-        // If CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT is set,
-        // CL_DEVICE_QUEUE_SUPPORTED must also be set.
-        if ((dseCaps & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT) != 0
-            && (dseCaps & CL_DEVICE_QUEUE_SUPPORTED) == 0)
-        {
-            log_error("DEVICE_QUEUE_REPLACEABLE_DEFAULT is set but "
-                      "DEVICE_QUEUE_SUPPORTED is not set\n");
-            return TEST_FAIL;
-        }
-
-        // Devices that set CL_DEVICE_QUEUE_SUPPORTED must also return CL_TRUE
-        // for CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT.
-        if ((dseCaps & CL_DEVICE_QUEUE_SUPPORTED) != 0)
-        {
-            cl_bool b;
-            error = clGetDeviceInfo(deviceID,
-                                    CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
-                                    sizeof(b), &b, NULL);
-            test_error(
-                error,
-                "Unable to query CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT");
-            test_assert_error(
-                b == CL_TRUE,
-                "DEVICE_QUEUE_SUPPORTED is set but "
-                "CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT returned CL_FALSE");
-        }
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_pipes(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_PIPE_SUPPORT
-    // May return CL_FALSE, indicating that device does not support Pipes.
-    cl_int error;
-
-    cl_bool pipeSupport = CL_FALSE;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_SUPPORT,
-                            sizeof(pipeSupport), &pipeSupport, NULL);
-    test_error(error, "Unable to query CL_DEVICE_PIPE_SUPPORT");
-
-    if (pipeSupport == CL_FALSE)
-    {
-        // clGetDeviceInfo, passing
-        // CL_DEVICE_MAX_PIPE_ARGS,
-        // CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, or
-        // CL_DEVICE_PIPE_MAX_PACKET_SIZE
-        // Returns 0 if device does not support Pipes.
-
-        cl_uint u = 0;
-
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PIPE_ARGS, sizeof(u),
-                                &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_MAX_PIPE_ARGS");
-        test_assert_error(u == 0,
-                          "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE, but "
-                          "CL_DEVICE_MAX_PIPE_ARGS returned a non-zero value");
-
-        error =
-            clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS,
-                            sizeof(u), &u, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS");
-        test_assert_error(u == 0,
-                          "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE, but "
-                          "CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS returned "
-                          "a non-zero value");
-
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_PACKET_SIZE,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_PIPE_MAX_PACKET_SIZE");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE, but "
-            "CL_DEVICE_PIPE_MAX_PACKET_SIZE returned a non-zero value");
-
-        // clCreatePipe
-        // Returns CL_INVALID_OPERATION if no devices in context support Pipes.
-        clMemWrapper mem =
-            clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, 4, 4, NULL, &error);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE but "
-                           "clCreatePipe did not return CL_INVALID_OPERATION");
-
-        // clGetPipeInfo
-        // Returns CL_INVALID_MEM_OBJECT since pipe cannot be a valid pipe
-        // object.
-        clMemWrapper not_a_pipe =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, 4, NULL, &error);
-        test_error(error, "Unable to create non-pipe buffer");
-
-        error =
-            clGetPipeInfo(not_a_pipe, CL_PIPE_PACKET_SIZE, sizeof(u), &u, NULL);
-        test_failure_error(
-            error, CL_INVALID_MEM_OBJECT,
-            "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE but clGetPipeInfo did "
-            "not return CL_INVALID_MEM_OBJECT");
-    }
-    else
-    {
-        // Devices that support pipes must also return CL_TRUE
-        // for CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT.
-        cl_bool b;
-        error =
-            clGetDeviceInfo(deviceID, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
-                            sizeof(b), &b, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT");
-        test_assert_error(
-            b == CL_TRUE,
-            "CL_DEVICE_PIPE_SUPPORT returned CL_TRUE but "
-            "CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT returned CL_FALSE");
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_progvar(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE
-    // May return 0, indicating that device does not support Program Scope
-    // Global Variables.
-    cl_int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    size_t maxGlobalVariableSize = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
-                            sizeof(maxGlobalVariableSize),
-                            &maxGlobalVariableSize, NULL);
-    test_error(error, "Unable to query CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE");
-
-    if (maxGlobalVariableSize == 0)
-    {
-        // Test setup:
-
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-
-        size_t sz = SIZE_MAX;
-
-        // clGetDeviceInfo, passing
-        // CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
-        // Returns 0 if device does not support Program Scope Global Variables.
-
-        error = clGetDeviceInfo(deviceID,
-                                CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
-                                sizeof(sz), &sz, NULL);
-        test_error(
-            error,
-            "Unable to query CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE");
-        test_assert_error(
-            sz == 0,
-            "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE returned 0 but "
-            "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE returned a "
-            "non-zero value");
-
-        // clGetProgramBuildInfo, passing
-        // CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE
-        // Returns 0 if device does not support Program Scope Global Variables.
-
-        error = clGetProgramBuildInfo(
-            program, deviceID, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
-            sizeof(sz), &sz, NULL);
-        test_error(
-            error,
-            "Unable to query CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE");
-        test_assert_error(sz == 0,
-                          "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE returned 0 "
-                          "but CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE "
-                          "returned a non-zero value");
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_non_uniform_work_group(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT:
-    // May return CL_FALSE, indicating that device does not support Non-Uniform
-    // Work Groups.
-    cl_int error;
-
-    const size_t allocSize = 16;
-    clMemWrapper mem;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    cl_bool nonUniformWorkGroupSupport = CL_FALSE;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
-                            sizeof(nonUniformWorkGroupSupport),
-                            &nonUniformWorkGroupSupport, NULL);
-    test_error(error,
-               "Unable to query CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT");
-
-    if (nonUniformWorkGroupSupport == CL_FALSE)
-    {
-        // Test setup:
-
-        mem =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, allocSize, NULL, &error);
-        test_error(error, "Unable to create test buffer");
-
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-
-        error = clSetKernelArg(kernel, 0, sizeof(mem), &mem);
-
-        // clEnqueueNDRangeKernel
-        // Behaves as though Non-Uniform Work Groups were not enabled for
-        // kernel, if the device associated with command_queue does not support
-        // Non-Uniform Work Groups.
-
-        size_t global_work_size[] = { 3, 3, 3 };
-        size_t local_work_size[] = { 2, 2, 2 };
-
-        // First, check that a NULL local work size succeeds.
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
-                                       NULL, 0, NULL, NULL);
-        test_error(error,
-                   "Unable to enqueue kernel with a NULL local work size");
-
-        error = clFinish(queue);
-        test_error(error, "Error calling clFinish after NULL local work size");
-
-        // 1D non-uniform work group:
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
-                                       local_work_size, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_WORK_GROUP_SIZE,
-            "CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT returned CL_FALSE but 1D "
-            "clEnqueueNDRangeKernel did not return CL_INVALID_WORK_GROUP_SIZE");
-
-        // 2D non-uniform work group:
-        global_work_size[0] = local_work_size[0];
-        error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_work_size,
-                                       local_work_size, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_WORK_GROUP_SIZE,
-            "CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT returned CL_FALSE but 2D "
-            "clEnqueueNDRangeKernel did not return CL_INVALID_WORK_GROUP_SIZE");
-
-        // 3D non-uniform work group:
-        global_work_size[1] = local_work_size[1];
-        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
-                                       local_work_size, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_WORK_GROUP_SIZE,
-            "CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT returned CL_FALSE but 3D "
-            "clEnqueueNDRangeKernel did not return CL_INVALID_WORK_GROUP_SIZE");
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_read_write_images(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue, int num_elements)
-{
-    // clGetDeviceInfo, passing
-    // CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS May return 0,
-    // indicating that device does not support Read-Write Images.
-    cl_int error;
-
-    cl_uint maxReadWriteImageArgs = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                            sizeof(maxReadWriteImageArgs),
-                            &maxReadWriteImageArgs, NULL);
-    test_error(error,
-               "Unable to query "
-               "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS");
-
-    // clGetSupportedImageFormats, passing
-    // CL_MEM_KERNEL_READ_AND_WRITE
-    // Returns an empty set (such as num_image_formats equal to 0), indicating
-    // that no image formats are supported for reading and writing in the same
-    // kernel, if no devices in context support Read-Write Images.
-
-    cl_uint totalReadWriteImageFormats = 0;
-
-    const cl_mem_object_type image_types[] = {
-        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
-        CL_MEM_OBJECT_IMAGE2D,       CL_MEM_OBJECT_IMAGE3D,
-        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY,
-    };
-    for (int i = 0; i < ARRAY_SIZE(image_types); i++)
-    {
-        cl_uint numImageFormats = 0;
-        error = clGetSupportedImageFormats(
-            context, CL_MEM_KERNEL_READ_AND_WRITE, image_types[i], 0, NULL,
-            &numImageFormats);
-        test_error(error,
-                   "Unable to query number of CL_MEM_KERNEL_READ_AND_WRITE "
-                   "image formats");
-
-        totalReadWriteImageFormats += numImageFormats;
-    }
-
-    if (maxReadWriteImageArgs == 0)
-    {
-        test_assert_error(
-            totalReadWriteImageFormats == 0,
-            "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS returned 0 "
-            "but clGetSupportedImageFormats(CL_MEM_KERNEL_READ_AND_WRITE) "
-            "returned a non-empty set");
-    }
-    else
-    {
-        test_assert_error(
-            totalReadWriteImageFormats != 0,
-            "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS is non-zero "
-            "but clGetSupportedImageFormats(CL_MEM_KERNEL_READ_AND_WRITE) "
-            "returned an empty set");
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_2d_image_from_buffer(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_IMAGE_PITCH_ALIGNMENT or
-    // CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
-    // May return 0, indicating that device does not support Creating a 2D Image
-    // from a Buffer.
-    cl_int error;
-
-    const cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
-    const size_t imageDim = 2;
-    const size_t elementSize = 4;
-    const size_t bufferSize = imageDim * imageDim * elementSize;
-
-    clMemWrapper buffer;
-    clMemWrapper image;
-
-    cl_uint imagePitchAlignment = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_PITCH_ALIGNMENT,
-                            sizeof(imagePitchAlignment), &imagePitchAlignment,
-                            NULL);
-    test_error(error,
-               "Unable to query "
-               "CL_DEVICE_IMAGE_PITCH_ALIGNMENT");
-
-    cl_uint imageBaseAddressAlignment = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT,
-                            sizeof(imageBaseAddressAlignment),
-                            &imageBaseAddressAlignment, NULL);
-    test_error(error,
-               "Unable to query "
-               "CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT");
-
-    bool supports_cl_khr_image2d_from_buffer =
-        is_extension_available(deviceID, "cl_khr_image2d_from_buffer");
-
-    if (imagePitchAlignment == 0 || imageBaseAddressAlignment == 0)
-    {
-        // This probably means that Creating a 2D Image from a Buffer is not
-        // supported.
-
-        // Test setup:
-        buffer =
-            clCreateBuffer(context, CL_MEM_READ_ONLY, bufferSize, NULL, &error);
-        test_error(error, "Unable to create test buffer");
-
-        // Check that both queries return zero:
-        test_assert_error(
-            imagePitchAlignment == 0,
-            "CL_DEVICE_IMAGE_PITCH_ALIGNMENT returned a non-zero value but "
-            "CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT returned 0");
-        test_assert_error(
-            imageBaseAddressAlignment == 0,
-            "CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT returned a non-zero value "
-            "but CL_DEVICE_IMAGE_PITCH_ALIGNMENT returned 0");
-
-        // clGetDeviceInfo, passing CL_DEVICE_EXTENSIONS
-        // Will not describe support for the cl_khr_image2d_from_buffer
-        // extension if device does not support Creating a 2D Image from a
-        // Buffer.
-        test_assert_error(supports_cl_khr_image2d_from_buffer == false,
-                          "Device does not support Creating a 2D Image from a "
-                          "Buffer but does support cl_khr_image2d_from_buffer");
-
-        // clCreateImage or clCreateImageWithProperties, passing image_type
-        // equal to CL_MEM_OBJECT_IMAGE2D and mem_object not equal to
-        // NULL
-        // Returns CL_INVALID_OPERATION if no devices in context support
-        // Creating a 2D Image from a Buffer.
-
-        cl_image_desc imageDesc = { 0 };
-        imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
-        imageDesc.image_width = imageDim;
-        imageDesc.image_height = imageDim;
-        imageDesc.mem_object = buffer;
-
-        image = clCreateImage(context, CL_MEM_READ_ONLY, &imageFormat,
-                              &imageDesc, NULL, &error);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "Device does not support Creating a 2D Image from a "
-            "Buffer but clCreateImage did not return CL_INVALID_OPERATION");
-
-        image =
-            clCreateImageWithProperties(context, NULL, CL_MEM_READ_ONLY,
-                                        &imageFormat, &imageDesc, NULL, &error);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "Device does not support Creating a 2D Image from a "
-                           "Buffer but clCreateImageWithProperties did not "
-                           "return CL_INVALID_OPERATION");
-    }
-    else
-    {
-        test_assert_error(supports_cl_khr_image2d_from_buffer,
-                          "Device supports Creating a 2D Image from a Buffer "
-                          "but does not support cl_khr_image2d_from_buffer");
-    }
-
-    return TEST_PASS;
-}
-
-// Nothing needed for sRGB Images:
-// All of the sRGB Image Channel Orders (such as CL_sRGBA) are optional for
-// devices supporting OpenCL 3.0.
-
-int test_consistency_depth_images(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-{
-    // The CL_DEPTH Image Channel Order is optional for devices supporting
-    // OpenCL 3.0.
-    cl_int error;
-
-    cl_uint totalDepthImageFormats = 0;
-
-    const cl_mem_flags mem_flags[] = {
-        CL_MEM_WRITE_ONLY,
-        CL_MEM_READ_WRITE,
-        CL_MEM_KERNEL_READ_AND_WRITE,
-    };
-    for (int i = 0; i < ARRAY_SIZE(mem_flags); i++)
-    {
-        cl_uint numImageFormats = 0;
-        error = clGetSupportedImageFormats(context, mem_flags[i],
-                                           CL_MEM_OBJECT_IMAGE2D, 0, NULL,
-                                           &numImageFormats);
-        test_error(
-            error,
-            "Unable to query number of CL_MEM_OBJECT_IMAGE2D image formats");
-
-        std::vector<cl_image_format> imageFormats(numImageFormats);
-        error = clGetSupportedImageFormats(
-            context, mem_flags[i], CL_MEM_OBJECT_IMAGE2D, imageFormats.size(),
-            imageFormats.data(), NULL);
-        test_error(error,
-                   "Unable to query CL_MEM_OBJECT_IMAGE2D image formats");
-        for (auto& imageFormat : imageFormats)
-        {
-            if (imageFormat.image_channel_order == CL_DEPTH)
-            {
-                totalDepthImageFormats++;
-            }
-        }
-    }
-
-    bool supports_cl_khr_depth_images =
-        is_extension_available(deviceID, "cl_khr_depth_images");
-
-    if (totalDepthImageFormats == 0)
-    {
-        test_assert_error(supports_cl_khr_depth_images == false,
-                          "Device does not support Depth Images but does "
-                          "support cl_khr_depth_images");
-    }
-    else
-    {
-        test_assert_error(supports_cl_khr_depth_images,
-                          "Device supports Depth Images but does not support "
-                          "cl_khr_depth_images");
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_device_and_host_timer(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements)
-{
-    // clGetPlatformInfo, passing CL_PLATFORM_HOST_TIMER_RESOLUTION
-    // May return 0, indicating that platform does not support Device and Host
-    // Timer Synchronization.
-    cl_int error;
-
-    cl_platform_id platform = NULL;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform),
-                            &platform, NULL);
-    test_error(error, "Unable to query CL_DEVICE_PLATFORM");
-
-    cl_ulong hostTimerResolution = 0;
-    error = clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
-                              sizeof(hostTimerResolution), &hostTimerResolution,
-                              NULL);
-    test_error(error, "Unable to query CL_PLATFORM_HOST_TIMER_RESOLUTION");
-
-    if (hostTimerResolution == 0)
-    {
-        // clGetDeviceAndHostTimer, clGetHostTimer
-        // Returns CL_INVALID_OPERATION if the platform associated with device
-        // does not support Device and Host Timer Synchronization.
-
-        cl_ulong dt = 0;
-        cl_ulong ht = 0;
-
-        error = clGetDeviceAndHostTimer(deviceID, &dt, &ht);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_PLATFORM_HOST_TIMER_RESOLUTION returned 0 but "
-            "clGetDeviceAndHostTimer did not return CL_INVALID_OPERATION");
-
-        error = clGetHostTimer(deviceID, &ht);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_PLATFORM_HOST_TIMER_RESOLUTION returned 0 but "
-            "clGetHostTimer did not return CL_INVALID_OPERATION");
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_il_programs(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_IL_VERSION or
-    // CL_DEVICE_ILS_WITH_VERSION
-    // May return an empty string and empty array, indicating that device does
-    // not support Intermediate Language Programs.
-    cl_int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    // Even if the device does not support Intermediate Language Programs the
-    // size of the string query should not be zero.
-    size_t sz = SIZE_MAX;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IL_VERSION, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_IL_VERSION");
-    test_assert_error(sz != 0,
-                      "CL_DEVICE_IL_VERSION should return a non-zero size");
-
-    std::string ilVersion = get_device_il_version_string(deviceID);
-
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_ILS_WITH_VERSION, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_ILS_WITH_VERSION");
-
-    if (ilVersion == "" || sz == 0)
-    {
-        // This probably means that Intermediate Language Programs are not
-        // supported.
-
-        // Check that both queries are consistent:
-        test_assert_error(
-            ilVersion == "",
-            "CL_DEVICE_IL_VERSION returned a non-empty string but "
-            "CL_DEVICE_ILS_WITH_VERSION returned no supported ILs");
-
-        test_assert_error(sz == 0,
-                          "CL_DEVICE_ILS_WITH_VERSION returned supported ILs "
-                          "but CL_DEVICE_IL_VERSION returned an empty string");
-
-        bool supports_cl_khr_il_program =
-            is_extension_available(deviceID, "cl_khr_il_program");
-        test_assert_error(supports_cl_khr_il_program == false,
-                          "Device does not support IL Programs but does "
-                          "support cl_khr_il_program");
-
-        // Test setup:
-
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-
-        // clGetProgramInfo, passing CL_PROGRAM_IL
-        // Returns an empty buffer (such as param_value_size_ret equal to 0) if
-        // no devices in the context associated with program support
-        // Intermediate Language Programs.
-
-        error = clGetProgramInfo(program, CL_PROGRAM_IL, 0, NULL, &sz);
-        test_error(error, "Unable to query CL_PROGRAM_IL");
-        test_assert_error(sz == 0,
-                          "Device does not support IL Programs but "
-                          "CL_PROGRAM_IL returned a non-zero size");
-
-        // clCreateProgramWithIL
-        // Returns CL_INVALID_OPERATION if no devices in context support
-        // Intermediate Language Programs.
-
-        cl_uint bogus = 0xDEADBEEF;
-        clProgramWrapper ilProgram =
-            clCreateProgramWithIL(context, &bogus, sizeof(bogus), &error);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "Device does not support IL Programs but clCreateProgramWithIL did "
-            "not return CL_INVALID_OPERATION");
-
-        // clSetProgramSpecializationConstant
-        // Returns CL_INVALID_OPERATION if no devices associated with program
-        // support Intermediate Language Programs.
-
-        cl_uint specConst = 42;
-        error = clSetProgramSpecializationConstant(
-            program, 0, sizeof(specConst), &specConst);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "Device does not support IL Programs but "
-                           "clSetProgramSpecializationConstant did not return "
-                           "CL_INVALID_OPERATION");
-    }
-
-    return TEST_PASS;
-}
-
-int test_consistency_subgroups(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements)
-{
-    // clGetDeviceInfo, passing CL_DEVICE_MAX_NUM_SUB_GROUPS
-    // May return 0, indicating that device does not support Subgroups.
-    cl_int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    cl_uint maxNumSubGroups = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_NUM_SUB_GROUPS,
-                            sizeof(maxNumSubGroups), &maxNumSubGroups, NULL);
-    test_error(error, "Unable to query CL_DEVICE_MAX_NUM_SUB_GROUPS");
-
-    if (maxNumSubGroups == 0)
-    {
-        // Test setup:
-
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-
-        // clGetDeviceInfo, passing
-        // CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
-        // Returns CL_FALSE if device does not support Subgroups.
-
-        cl_bool ifp = CL_FALSE;
-        error = clGetDeviceInfo(
-            deviceID, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
-            sizeof(ifp), &ifp, NULL);
-        test_error(
-            error,
-            "Unable to query CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS");
-        test_assert_error(ifp == CL_FALSE,
-                          "Device does not support Subgroups but "
-                          "CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS "
-                          "did not return CL_FALSE");
-
-        // clGetDeviceInfo, passing CL_DEVICE_EXTENSIONS
-        // Will not describe support for the cl_khr_subgroups extension if
-        // device does not support Subgroups.
-
-        bool supports_cl_khr_subgroups =
-            is_extension_available(deviceID, "cl_khr_subgroups");
-        test_assert_error(supports_cl_khr_subgroups == false,
-                          "Device does not support Subgroups but does "
-                          "support cl_khr_subgroups");
-
-        // clGetKernelSubGroupInfo
-        // Returns CL_INVALID_OPERATION if device does not support Subgroups.
-
-        size_t sz = SIZE_MAX;
-        error = clGetKernelSubGroupInfo(kernel, deviceID,
-                                        CL_KERNEL_MAX_NUM_SUB_GROUPS, 0, NULL,
-                                        sizeof(sz), &sz, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "Device does not support Subgroups but clGetKernelSubGroupInfo did "
-            "not return CL_INVALID_OPERATION");
-    }
-
-    return TEST_PASS;
-}
-
-static void CL_CALLBACK program_callback(cl_program, void*) {}
-
-int test_consistency_prog_ctor_dtor(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-{
-    cl_int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    // Test setup:
-
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &test_kernel, "test");
-    test_error(error, "Unable to create test kernel");
-
-    // clGetProgramInfo, passing CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT or
-    // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT
-    // Returns CL_FALSE if no devices in the context associated with program
-    // support Program Initialization and Clean-Up Kernels.
-
-    cl_bool b = CL_FALSE;
-
-    error = clGetProgramInfo(program, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
-                             sizeof(b), &b, NULL);
-    test_error(error, "Unable to query CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT");
-    test_assert_error(
-        b == CL_FALSE,
-        "CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT did not return CL_FALSE");
-
-    error = clGetProgramInfo(program, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
-                             sizeof(b), &b, NULL);
-    test_error(error, "Unable to query CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT");
-    test_assert_error(
-        b == CL_FALSE,
-        "CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT did not return CL_FALSE");
-
-    // clSetProgramReleaseCallback
-    // Returns CL_INVALID_OPERATION if no devices in the context associated with
-    // program support Program Initialization and Clean-Up Kernels.
-
-    error = clSetProgramReleaseCallback(program, program_callback, NULL);
-    test_failure_error(
-        error, CL_INVALID_OPERATION,
-        "clSetProgramReleaseCallback did not return CL_INVALID_OPERATION");
-
-    return TEST_PASS;
-}
-
-int test_consistency_3d_image_writes(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements)
-{
-    // clGetSupportedImageFormats, passing CL_MEM_OBJECT_IMAGE3D and one of
-    // CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE, or CL_MEM_KERNEL_READ_AND_WRITE
-    // Returns an empty set (such as num_image_formats equal to 0),
-    // indicating that no image formats are supported for writing to 3D
-    // image objects, if no devices in context support Writing to 3D Image
-    // Objects.
-    cl_int error;
-
-    cl_uint total3DImageWriteFormats = 0;
-
-    const cl_mem_flags mem_flags[] = {
-        CL_MEM_WRITE_ONLY,
-        CL_MEM_READ_WRITE,
-        CL_MEM_KERNEL_READ_AND_WRITE,
-    };
-    for (int i = 0; i < ARRAY_SIZE(mem_flags); i++)
-    {
-        cl_uint numImageFormats = 0;
-        error = clGetSupportedImageFormats(context, mem_flags[i],
-                                           CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                           &numImageFormats);
-        test_error(
-            error,
-            "Unable to query number of CL_MEM_OBJECT_IMAGE3D image formats");
-
-        total3DImageWriteFormats += numImageFormats;
-    }
-
-    bool supports_cl_khr_3d_image_writes =
-        is_extension_available(deviceID, "cl_khr_3d_image_writes");
-
-    if (total3DImageWriteFormats == 0)
-    {
-        // clGetDeviceInfo, passing CL_DEVICE_EXTENSIONS
-        // Will not describe support for the cl_khr_3d_image_writes extension if
-        // device does not support Writing to 3D Image Objects.
-        test_assert_error(supports_cl_khr_3d_image_writes == false,
-                          "Device does not support Writing to 3D Image Objects "
-                          "but does support cl_khr_3d_image_writes");
-    }
-    else
-    {
-        test_assert_error(supports_cl_khr_3d_image_writes,
-                          "Device supports Writing to 3D Image Objects but "
-                          "does not support cl_khr_3d_image_writes");
-    }
-
-    return TEST_PASS;
-}

diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 9e981cd..4d90211 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp

@@ -136,8 +136,7 @@
     }
 
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * 100, NULL, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error );
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating test array failed!\n");
@@ -322,8 +321,7 @@
     test_error( error, "Failed to create the program and kernel.");
     free( programSrc );
 
-    result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL,
-                            &error);
+    result = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float), NULL, &error);
     test_error( error, "clCreateBufer failed");
 
     /* Create some I/O streams */
@@ -694,8 +692,7 @@
     PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
 
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
-                                   CL_MEM_READ_ONLY, 0, &image_format_desc);
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
     test_error( error, "Unable to obtain suitable image format to test with!" );
 
     /* Get the max 2d image width */
@@ -751,8 +748,7 @@
     PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
 
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
-                                   CL_MEM_READ_ONLY, 0, &image_format_desc);
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
     test_error( error, "Unable to obtain suitable image format to test with!" );
 
     /* Get the max 2d image width */
@@ -809,8 +805,7 @@
     PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
 
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
-                                   CL_MEM_READ_ONLY, 0, &image_format_desc);
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
     test_error( error, "Unable to obtain suitable image format to test with!" );
 
     /* Get the max 2d image width */
@@ -996,7 +991,6 @@
     size_t decrement;
     cl_event event;
     cl_int event_status;
-    bool embeddedNoLong = gIsEmbedded && !gHasLong;
 
 
     /* Get the max param size */
@@ -1010,9 +1004,8 @@
         return -1;
     }
 
-    /* The embedded profile without cles_khr_int64 extension does not require
-     * longs, so use ints */
-    if (embeddedNoLong)
+    /* The embedded profile does not require longs, so use ints */
+    if(gIsEmbedded)
         numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int);
     else
         numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long);
@@ -1028,7 +1021,7 @@
         clMemWrapper mem;
         clKernelWrapper kernel;
 
-        if (embeddedNoLong)
+        if(gIsEmbedded)
         {
             log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
                      numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem),
@@ -1099,8 +1092,7 @@
         /* Try to set a large argument to the kernel */
         retVal = 0;
 
-        mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long), NULL,
-                             &error);
+        mem = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_long), NULL, &error);
         test_error(error, "clCreateBuffer failed");
 
         for (i=0; i<(int)numberOfIntParametersToTry; i++) {
@@ -1254,8 +1246,7 @@
     clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error );
     test_error( error, "Unable to create a test image" );
 
-    clMemWrapper stream =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error);
+    clMemWrapper stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), 16, NULL, &error );
     test_error( error, "Unable to create test buffer" );
 
     error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image );
@@ -1356,11 +1347,9 @@
             constantData[i] = (int)genrand_int32(d);
 
         clMemWrapper streams[3];
-        streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    sizeToAllocate, constantData, &error);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error);
         test_error( error, "Creating test array failed" );
-        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
-                                    NULL, &error);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
         test_error( error, "Creating test array failed" );
 
 
@@ -1524,8 +1513,7 @@
     streams = new clMemWrapper[ maxArgs + 1 ];
     for( i = 0; i < maxArgs + 1; i++ )
     {
-        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    individualBufferSize, NULL, &error);
+        streams[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), individualBufferSize, NULL, &error);
         test_error( error, "Creating test array failed" );
     }
 
@@ -1670,7 +1658,8 @@
     size_t    threads[1], localThreads[1];
     cl_int *localData, *resultData;
     cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
-    Version device_version;
+    cl_char buffer[ 4098 ];
+    size_t length;
     int i;
     int err = 0;
     MTdata d;
@@ -1679,33 +1668,31 @@
     error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 );
     test_error( error, "Unable to get max local buffer size" );
 
-    try
-    {
-        device_version = get_device_cl_version(deviceID);
-    } catch (const std::runtime_error &e)
-    {
-        log_error("%s", e.what());
-        return -1;
-    }
-
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
     if (!gIsEmbedded)
     {
-        if (device_version == Version(1, 0))
+        if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
             min_max_local_mem_size = 16L * 1024L;
-        else
+        else if( memcmp( buffer, "OpenCL 2.1", strlen( "OpenCL 2.1" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) != 0 )
             min_max_local_mem_size = 32L * 1024L;
-    }
-    else
-    {
-        min_max_local_mem_size = 1L * 1024L;
+        else
+        {
+            log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+            return -1;
+        }
     }
 
-    if (maxSize < min_max_local_mem_size)
+    if( maxSize < (gIsEmbedded ? 1L * 1024L : min_max_local_mem_size) )
     {
-        const std::string version_as_string = device_version.to_string();
-        log_error("ERROR: Reported local mem size less than required by OpenCL "
-                  "%s (reported %d KB)\n",
-                  version_as_string.c_str(), (int)(maxSize / 1024L));
+        log_error( "ERROR: Reported local mem size less than required by OpenCL 1.1 (reported %dKb)\n", (int)( maxSize / 1024L ) );
         return -1;
     }
 
@@ -1734,11 +1721,9 @@
         localData[i] = (int)genrand_int32(d);
     free_mtdata(d); d = NULL;
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
-                                localData, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, localData, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
     test_error( error, "Creating test array failed" );
 
 

diff --git a/test_conformance/api/test_clone_kernel.cpp b/test_conformance/api/test_clone_kernel.cpp
index 1a7e67a..1f22781 100644
--- a/test_conformance/api/test_clone_kernel.cpp
+++ b/test_conformance/api/test_clone_kernel.cpp

@@ -113,16 +113,15 @@
     clSamplerWrapper sampler;
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNSIGNED_INT8;
-    cl_image_desc imageDesc;
-    memset(&imageDesc, 0x0, sizeof(cl_image_desc));
+	cl_image_desc imageDesc;
+	memset(&imageDesc, 0x0, sizeof(cl_image_desc));
     imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
     imageDesc.image_width = 512;
     imageDesc.image_height = 512;
 
     cl_uint color[4] = {1,3,5,7};
 
-    clProgramWrapper program_read;
-    clProgramWrapper program_write;
+    clProgramWrapper program;
     clKernelWrapper kernel_read;
     clKernelWrapper kernel_write;
     clKernelWrapper kernel_cloned;
@@ -130,16 +129,12 @@
 
     clMemWrapper img;
 
-    if (create_single_kernel_helper(context, &program_read, &kernel_read, 1,
-                                    clone_kernel_test_img, "img_read_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_read, 1, clone_kernel_test_img, "img_read_kernel" ) != 0 )
     {
         return -1;
     }
 
-    if (create_single_kernel_helper(context, &program_write, &kernel_write, 1,
-                                    clone_kernel_test_img, "img_write_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_write, 1, clone_kernel_test_img, "img_write_kernel" ) != 0 )
     {
         return -1;
     }
@@ -246,8 +241,6 @@
 {
     int error;
     clProgramWrapper program;
-    clProgramWrapper program_buf_read;
-    clProgramWrapper program_buf_write;
     clKernelWrapper kernel;
     clKernelWrapper kernel_pipe_read;
     clKernelWrapper kernel_buf_read;
@@ -279,18 +272,12 @@
         return -1;
     }
 
-    if (create_single_kernel_helper(context, &program_buf_read,
-                                    &kernel_buf_read, 1,
-                                    clone_kernel_test_kernel, "buf_read_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_buf_read, 1, clone_kernel_test_kernel, "buf_read_kernel" ) != 0 )
     {
         return -1;
     }
 
-    if (create_single_kernel_helper(
-            context, &program_buf_write, &kernel_buf_write, 1,
-            clone_kernel_test_kernel, "buf_write_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_buf_write, 1, clone_kernel_test_kernel, "buf_write_kernel" ) != 0 )
     {
         return -1;
     }

diff --git a/test_conformance/api/test_context_destructor_callback.cpp b/test_conformance/api/test_context_destructor_callback.cpp
deleted file mode 100644
index 1d73a3c..0000000
--- a/test_conformance/api/test_context_destructor_callback.cpp
+++ /dev/null

@@ -1,93 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-static volatile cl_int sDestructorIndex;
-
-void CL_CALLBACK context_destructor_callback(cl_context context, void *userData)
-{
-    int *userPtr = (int *)userData;
-
-    // ordering of callbacks is guaranteed, meaning we don't need to do atomic
-    // operation here
-    *userPtr = ++sDestructorIndex;
-}
-
-int test_context_destructor_callback(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements)
-{
-    cl_int error;
-    clContextWrapper localContext =
-        clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error);
-    test_error(error, "Unable to create local context");
-
-    // Set up some variables to catch the order in which callbacks are called
-    volatile int callbackOrders[3] = { 0, 0, 0 };
-    sDestructorIndex = 0;
-
-    // Set up the callbacks
-    error = clSetContextDestructorCallback(
-        localContext, context_destructor_callback, (void *)&callbackOrders[0]);
-    test_error(error, "Unable to set destructor callback");
-
-    error = clSetContextDestructorCallback(
-        localContext, context_destructor_callback, (void *)&callbackOrders[1]);
-    test_error(error, "Unable to set destructor callback");
-
-    error = clSetContextDestructorCallback(
-        localContext, context_destructor_callback, (void *)&callbackOrders[2]);
-    test_error(error, "Unable to set destructor callback");
-
-    // Now release the context, which SHOULD call the callbacks
-    error = clReleaseContext(localContext);
-    test_error(error, "Unable to release local context");
-
-    // Note: since we manually released the context, we need to set it to NULL
-    // to prevent a double-release
-    localContext = NULL;
-
-    // At this point, all three callbacks should have already been called
-    int numErrors = 0;
-    for (int i = 0; i < 3; i++)
-    {
-        // Spin waiting for the release to finish.  If you don't call the
-        // context_destructor_callback, you will not pass the test.
-        log_info("\tWaiting for callback %d...\n", i);
-        int wait = 0;
-        while (0 == callbackOrders[i])
-        {
-            usleep(100000); // 1/10th second
-            if (++wait >= 10 * 10)
-            {
-                log_error("\tERROR: Callback %d was not called within 10 "
-                          "seconds!  Assuming failure.\n",
-                          i + 1);
-                numErrors++;
-                break;
-            }
-        }
-
-        if (callbackOrders[i] != 3 - i)
-        {
-            log_error("\tERROR: Callback %d was called in the wrong order! "
-                      "(Was called order %d, should have been order %d)\n",
-                      i + 1, callbackOrders[i], 3 - i);
-            numErrors++;
-        }
-    }
-
-    return (numErrors > 0) ? TEST_FAIL : TEST_PASS;
-}

diff --git a/test_conformance/api/test_create_context_from_type.cpp b/test_conformance/api/test_create_context_from_type.cpp
index b67041f..bbc2c86 100644
--- a/test_conformance/api/test_create_context_from_type.cpp
+++ b/test_conformance/api/test_create_context_from_type.cpp

@@ -80,11 +80,9 @@
     }
 
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context_to_test, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 10, NULL, &error);
+    streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context_to_test, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * 10, NULL, &error);
+    streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
 
     /* Write some test data */

diff --git a/test_conformance/api/test_create_kernels.cpp b/test_conformance/api/test_create_kernels.cpp
index 568e84c..59f7f0a 100644
--- a/test_conformance/api/test_create_kernels.cpp
+++ b/test_conformance/api/test_create_kernels.cpp

@@ -456,8 +456,7 @@
 
     // Create args
     count = 100;
-    output = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * count,
-                            NULL, &error);
+    output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
     test_error( error, "Unable to create output buffer" );
 
     error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
@@ -525,10 +524,11 @@
         local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
         test_error( error, "clCreateCommandQueue failed");
 
-        error = create_single_kernel_helper(
-            local_context, &local_program, &local_kernel, 1,
-            &repeate_test_kernel, "test_kernel");
-        test_error(error, "Unable to create kernel");
+        error = create_single_kernel_helper(local_context, &local_program, NULL, 1, &repeate_test_kernel, NULL);
+        test_error( error, "Unable to build test program" );
+
+        local_kernel = clCreateKernel(local_program, "test_kernel", &error);
+        test_error( error, "clCreateKernel failed");
 
         local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
         test_error( error, "clCreateBuffer failed");

diff --git a/test_conformance/api/test_kernel_arg_changes.cpp b/test_conformance/api/test_kernel_arg_changes.cpp
index eb798a9..d85ae99 100644
--- a/test_conformance/api/test_kernel_arg_changes.cpp
+++ b/test_conformance/api/test_kernel_arg_changes.cpp

@@ -74,16 +74,14 @@
         sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
         sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
 
-        images[i] = create_image_2d(context, CL_MEM_READ_ONLY, &imageFormat,
-                                    sizes[i][0], sizes[i][1], 0, NULL, &error);
+        images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                     &imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
         if( images[i] == NULL )
         {
             log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
             return -1;
         }
-        results[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_int) * threads[0] * 2, NULL, &error);
+        results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof( cl_int ) * threads[0] * 2, NULL, &error );
         if( results[i] == NULL)
         {
             log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);

diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index 8073e0d..f1039ae 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp

@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2021 The Khronos Group Inc.
-//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,992 +13,5931 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include <iostream>
-#include <vector>
 #include "testBase.h"
-#include "harness/errorHelpers.h"
-#include "harness/typeWrappers.h"
-#include "harness/kernelHelpers.h"
+#include <limits.h>
+#include <ctype.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
 
-#define MINIMUM_OPENCL_PIPE_VERSION Version(2, 0)
+#define ARG_INFO_FIELD_COUNT        5
 
-static constexpr size_t CL_VERSION_LENGTH = 128;
-static constexpr size_t KERNEL_ARGUMENT_LENGTH = 128;
-static constexpr char KERNEL_ARGUMENT_NAME[] = "argument";
-static constexpr size_t KERNEL_ARGUMENT_NAME_LENGTH =
-    sizeof(KERNEL_ARGUMENT_NAME) + 1;
-static constexpr int SINGLE_KERNEL_ARG_NUMBER = 0;
-static constexpr int MAX_NUMBER_OF_KERNEL_ARGS = 128;
+#define ARG_INFO_ADDR_OFFSET        1
+#define ARG_INFO_ACCESS_OFFSET        2
+#define ARG_INFO_TYPE_QUAL_OFFSET    3
+#define ARG_INFO_TYPE_NAME_OFFSET    4
+#define ARG_INFO_ARG_NAME_OFFSET    5
 
-static const std::vector<cl_kernel_arg_address_qualifier> address_qualifiers = {
-    CL_KERNEL_ARG_ADDRESS_GLOBAL, CL_KERNEL_ARG_ADDRESS_LOCAL,
-    CL_KERNEL_ARG_ADDRESS_CONSTANT, CL_KERNEL_ARG_ADDRESS_PRIVATE
+
+typedef char const * kernel_args_t[];
+
+kernel_args_t required_kernel_args = {
+    "typedef float4 typedef_type;\n"
+    "\n"
+    "typedef struct struct_type {\n"
+    "    float4 float4d;\n"
+    "    int intd;\n"
+    "} typedef_struct_type;\n"
+    "\n"
+    "typedef union union_type {\n"
+    "    float4 float4d;\n"
+    "    uint4 uint4d;\n"
+    "} typedef_union_type;\n"
+    "\n"
+    "typedef enum enum_type {\n"
+    "    enum_type_zero,\n"
+    "    enum_type_one,\n"
+    "    enum_type_two\n"
+    "} typedef_enum_type;\n"
+    "\n"
+    "kernel void constant_scalar_p0(constant void*constantvoidp,\n"
+    "                              constant char *constantcharp,\n"
+    "                              constant uchar* constantucharp,\n"
+    "                              constant unsigned char * constantunsignedcharp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p1(constant short*constantshortp,\n"
+    "                              constant ushort *constantushortp,\n"
+    "                              constant unsigned short* constantunsignedshortp,\n"
+    "                              constant int * constantintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p2(constant uint*constantuintp,\n"
+    "                              constant unsigned int *constantunsignedintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p3(constant float *constantfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n"
+    "                                       constant char * restrict constantcharrestrictp,\n"
+    "                                       constant uchar*restrict constantucharrestrictp,\n"
+    "                                       constant unsigned char *restrict constantunsignedcharrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n"
+    "                                       constant ushort * restrict constantushortrestrictp,\n"
+    "                                       constant unsigned short*restrict constantunsignedshortrestrictp,\n"
+    "                                       constant int *restrict constantintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n"
+    "                                       constant unsigned int * restrict constantunsignedintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p3(constant float * restrict constantfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_p(global void*globalvoidp,\n"
+    "                            global char *globalcharp,\n"
+    "                            global uchar* globalucharp,\n"
+    "                            global unsigned char * globalunsignedcharp,\n"
+    "                            global short*globalshortp,\n"
+    "                            global ushort *globalushortp,\n"
+    "                            global unsigned short* globalunsignedshortp,\n"
+    "                            global int * globalintp,\n"
+    "                            global uint*globaluintp,\n"
+    "                            global unsigned int *globalunsignedintp,\n"
+    "                            global float *globalfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n"
+    "                                     global char * restrict globalcharrestrictp,\n"
+    "                                     global uchar*restrict globalucharrestrictp,\n"
+    "                                     global unsigned char *restrict globalunsignedcharrestrictp,\n"
+    "                                     global short* restrict globalshortrestrictp,\n"
+    "                                     global ushort * restrict globalushortrestrictp,\n"
+    "                                     global unsigned short*restrict globalunsignedshortrestrictp,\n"
+    "                                     global int *restrict globalintrestrictp,\n"
+    "                                     global uint* restrict globaluintrestrictp,\n"
+    "                                     global unsigned int * restrict globalunsignedintrestrictp,\n"
+    "                                     global float * restrict globalfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n"
+    "                                  global const char *globalconstcharp,\n"
+    "                                  global const uchar* globalconstucharp,\n"
+    "                                  global const unsigned char * globalconstunsignedcharp,\n"
+    "                                  global const short*globalconstshortp,\n"
+    "                                  global const ushort *globalconstushortp,\n"
+    "                                  global const unsigned short* globalconstunsignedshortp,\n"
+    "                                  global const int * globalconstintp,\n"
+    "                                  global const uint*globalconstuintp,\n"
+    "                                  global const unsigned int *globalconstunsignedintp,\n"
+    "                                  global const float *globalconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n"
+    "                                           global const char * restrict globalconstcharrestrictp,\n"
+    "                                           global const uchar*restrict globalconstucharrestrictp,\n"
+    "                                           global const unsigned char *restrict globalconstunsignedcharrestrictp,\n"
+    "                                           global const short* restrict globalconstshortrestrictp,\n"
+    "                                           global const ushort * restrict globalconstushortrestrictp,\n"
+    "                                           global const unsigned short*restrict globalconstunsignedshortrestrictp,\n"
+    "                                           global const int *restrict globalconstintrestrictp,\n"
+    "                                           global const uint* restrict globalconstuintrestrictp,\n"
+    "                                           global const unsigned int * restrict globalconstunsignedintrestrictp,\n"
+    "                                           global const float * restrict globalconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n"
+    "                                     global volatile char *globalvolatilecharp,\n"
+    "                                     global volatile uchar* globalvolatileucharp,\n"
+    "                                     global volatile unsigned char * globalvolatileunsignedcharp,\n"
+    "                                     global volatile short*globalvolatileshortp,\n"
+    "                                     global volatile ushort *globalvolatileushortp,\n"
+    "                                     global volatile unsigned short* globalvolatileunsignedshortp,\n"
+    "                                     global volatile int * globalvolatileintp,\n"
+    "                                     global volatile uint*globalvolatileuintp,\n"
+    "                                     global volatile unsigned int *globalvolatileunsignedintp,\n"
+    "                                     global volatile float *globalvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n"
+    "                                              global volatile char * restrict globalvolatilecharrestrictp,\n"
+    "                                              global volatile uchar*restrict globalvolatileucharrestrictp,\n"
+    "                                              global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n"
+    "                                              global volatile short* restrict globalvolatileshortrestrictp,\n"
+    "                                              global volatile ushort * restrict globalvolatileushortrestrictp,\n"
+    "                                              global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n"
+    "                                              global volatile int *restrict globalvolatileintrestrictp,\n"
+    "                                              global volatile uint* restrict globalvolatileuintrestrictp,\n"
+    "                                              global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n"
+    "                                              global volatile float * restrict globalvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n"
+    "                                           global const volatile char *globalconstvolatilecharp,\n"
+    "                                           global const volatile uchar* globalconstvolatileucharp,\n"
+    "                                           global const volatile unsigned char * globalconstvolatileunsignedcharp,\n"
+    "                                           global const volatile short*globalconstvolatileshortp,\n"
+    "                                           global const volatile ushort *globalconstvolatileushortp,\n"
+    "                                           global const volatile unsigned short* globalconstvolatileunsignedshortp,\n"
+    "                                           global const volatile int * globalconstvolatileintp,\n"
+    "                                           global const volatile uint*globalconstvolatileuintp,\n"
+    "                                           global const volatile unsigned int *globalconstvolatileunsignedintp,\n"
+    "                                           global const volatile float *globalconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n"
+    "                                                    global const volatile char * restrict globalconstvolatilecharrestrictp,\n"
+    "                                                    global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n"
+    "                                                    global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n"
+    "                                                    global const volatile short* restrict globalconstvolatileshortrestrictp,\n"
+    "                                                    global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n"
+    "                                                    global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n"
+    "                                                    global const volatile int *restrict globalconstvolatileintrestrictp,\n"
+    "                                                    global const volatile uint* restrict globalconstvolatileuintrestrictp,\n"
+    "                                                    global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n"
+    "                                                    global const volatile float * restrict globalconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_p(local void*localvoidp,\n"
+    "                           local char *localcharp,\n"
+    "                           local uchar* localucharp,\n"
+    "                           local unsigned char * localunsignedcharp,\n"
+    "                           local short*localshortp,\n"
+    "                           local ushort *localushortp,\n"
+    "                           local unsigned short* localunsignedshortp,\n"
+    "                           local int * localintp,\n"
+    "                           local uint*localuintp,\n"
+    "                           local unsigned int *localunsignedintp,\n"
+    "                           local float *localfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n"
+    "                                    local char * restrict localcharrestrictp,\n"
+    "                                    local uchar*restrict localucharrestrictp,\n"
+    "                                    local unsigned char *restrict localunsignedcharrestrictp,\n"
+    "                                    local short* restrict localshortrestrictp,\n"
+    "                                    local ushort * restrict localushortrestrictp,\n"
+    "                                    local unsigned short*restrict localunsignedshortrestrictp,\n"
+    "                                    local int *restrict localintrestrictp,\n"
+    "                                    local uint* restrict localuintrestrictp,\n"
+    "                                    local unsigned int * restrict localunsignedintrestrictp,\n"
+    "                                    local float * restrict localfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_p(local const void*localconstvoidp,\n"
+    "                                 local const char *localconstcharp,\n"
+    "                                 local const uchar* localconstucharp,\n"
+    "                                 local const unsigned char * localconstunsignedcharp,\n"
+    "                                 local const short*localconstshortp,\n"
+    "                                 local const ushort *localconstushortp,\n"
+    "                                 local const unsigned short* localconstunsignedshortp,\n"
+    "                                 local const int * localconstintp,\n"
+    "                                 local const uint*localconstuintp,\n"
+    "                                 local const unsigned int *localconstunsignedintp,\n"
+    "                                 local const float *localconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n"
+    "                                          local const char * restrict localconstcharrestrictp,\n"
+    "                                          local const uchar*restrict localconstucharrestrictp,\n"
+    "                                          local const unsigned char *restrict localconstunsignedcharrestrictp,\n"
+    "                                          local const short* restrict localconstshortrestrictp,\n"
+    "                                          local const ushort * restrict localconstushortrestrictp,\n"
+    "                                          local const unsigned short*restrict localconstunsignedshortrestrictp,\n"
+    "                                          local const int *restrict localconstintrestrictp,\n"
+    "                                          local const uint* restrict localconstuintrestrictp,\n"
+    "                                          local const unsigned int * restrict localconstunsignedintrestrictp,\n"
+    "                                          local const float * restrict localconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n"
+    "                                    local volatile char *localvolatilecharp,\n"
+    "                                    local volatile uchar* localvolatileucharp,\n"
+    "                                    local volatile unsigned char * localvolatileunsignedcharp,\n"
+    "                                    local volatile short*localvolatileshortp,\n"
+    "                                    local volatile ushort *localvolatileushortp,\n"
+    "                                    local volatile unsigned short* localvolatileunsignedshortp,\n"
+    "                                    local volatile int * localvolatileintp,\n"
+    "                                    local volatile uint*localvolatileuintp,\n"
+    "                                    local volatile unsigned int *localvolatileunsignedintp,\n"
+    "                                    local volatile float *localvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n"
+    "                                             local volatile char * restrict localvolatilecharrestrictp,\n"
+    "                                             local volatile uchar*restrict localvolatileucharrestrictp,\n"
+    "                                             local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n"
+    "                                             local volatile short* restrict localvolatileshortrestrictp,\n"
+    "                                             local volatile ushort * restrict localvolatileushortrestrictp,\n"
+    "                                             local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n"
+    "                                             local volatile int *restrict localvolatileintrestrictp,\n"
+    "                                             local volatile uint* restrict localvolatileuintrestrictp,\n"
+    "                                             local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n"
+    "                                             local volatile float * restrict localvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n"
+    "                                          local const volatile char *localconstvolatilecharp,\n"
+    "                                          local const volatile uchar* localconstvolatileucharp,\n"
+    "                                          local const volatile unsigned char * localconstvolatileunsignedcharp,\n"
+    "                                          local const volatile short*localconstvolatileshortp,\n"
+    "                                          local const volatile ushort *localconstvolatileushortp,\n"
+    "                                          local const volatile unsigned short* localconstvolatileunsignedshortp,\n"
+    "                                          local const volatile int * localconstvolatileintp,\n"
+    "                                          local const volatile uint*localconstvolatileuintp,\n"
+    "                                          local const volatile unsigned int *localconstvolatileunsignedintp,\n"
+    "                                          local const volatile float *localconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n"
+    "                                                   local const volatile char * restrict localconstvolatilecharrestrictp,\n"
+    "                                                   local const volatile uchar*restrict localconstvolatileucharrestrictp,\n"
+    "                                                   local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n"
+    "                                                   local const volatile short* restrict localconstvolatileshortrestrictp,\n"
+    "                                                   local const volatile ushort * restrict localconstvolatileushortrestrictp,\n"
+    "                                                   local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n"
+    "                                                   local const volatile int *restrict localconstvolatileintrestrictp,\n"
+    "                                                   local const volatile uint* restrict localconstvolatileuintrestrictp,\n"
+    "                                                   local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n"
+    "                                                   local const volatile float * restrict localconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void scalar_d(char chard,\n"
+    "                     uchar uchard,\n"
+    "                     unsigned char unsignedchard,\n"
+    "                     short shortd,\n"
+    "                     ushort ushortd,\n"
+    "                     unsigned short unsignedshortd,\n"
+    "                     int intd,\n"
+    "                     uint uintd,\n"
+    "                     unsigned int unsignedintd,\n"
+    "                     float floatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_scalar_d(const char constchard,\n"
+    "                           const uchar constuchard,\n"
+    "                           const unsigned char constunsignedchard,\n"
+    "                           const short constshortd,\n"
+    "                           const ushort constushortd,\n"
+    "                           const unsigned short constunsignedshortd,\n"
+    "                           const int constintd,\n"
+    "                           const uint constuintd,\n"
+    "                           const unsigned int constunsignedintd,\n"
+    "                           const float constfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_scalar_d(private char privatechard,\n"
+    "                             private uchar privateuchard,\n"
+    "                             private unsigned char privateunsignedchard,\n"
+    "                             private short privateshortd,\n"
+    "                             private ushort privateushortd,\n"
+    "                             private unsigned short privateunsignedshortd,\n"
+    "                             private int privateintd,\n"
+    "                             private uint privateuintd,\n"
+    "                             private unsigned int privateunsignedintd,\n"
+    "                             private float privatefloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_scalar_d(private const char privateconstchard,\n"
+    "                                   private const uchar privateconstuchard,\n"
+    "                                   private const unsigned char privateconstunsignedchard,\n"
+    "                                   private const short privateconstshortd,\n"
+    "                                   private const ushort privateconstushortd,\n"
+    "                                   private const unsigned short privateconstunsignedshortd,\n"
+    "                                   private const int privateconstintd,\n"
+    "                                   private const uint privateconstuintd,\n"
+    "                                   private const unsigned int privateconstunsignedintd,\n"
+    "                                   private const float privateconstfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_p0(constant char2*constantchar2p,\n"
+    "                               constant uchar2 *constantuchar2p,\n"
+    "                               constant short2* constantshort2p,\n"
+    "                               constant ushort2 * constantushort2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p1(constant int2*constantint2p,\n"
+    "                               constant uint2 *constantuint2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n"
+    "                                        constant uchar2* restrict constantuchar2restrictp,\n"
+    "                                        constant short2 * restrict constantshort2restrictp,\n"
+    "                                        constant ushort2*restrict constantushort2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n"
+    "                                        constant uint2* restrict constantuint2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_p(global char2*globalchar2p,\n"
+    "                             global uchar2 *globaluchar2p,\n"
+    "                             global short2* globalshort2p,\n"
+    "                             global ushort2 * globalushort2p,\n"
+    "                             global int2*globalint2p,\n"
+    "                             global uint2 *globaluint2p,\n"
+    "                             global float2*globalfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n"
+    "                                      global uchar2* restrict globaluchar2restrictp,\n"
+    "                                      global short2 * restrict globalshort2restrictp,\n"
+    "                                      global ushort2*restrict globalushort2restrictp,\n"
+    "                                      global int2 *restrict globalint2restrictp,\n"
+    "                                      global uint2* restrict globaluint2restrictp,\n"
+    "                                      global float2 *restrict globalfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n"
+    "                                   global const uchar2 * globalconstuchar2p,\n"
+    "                                   global const short2*globalconstshort2p,\n"
+    "                                   global const ushort2 *globalconstushort2p,\n"
+    "                                   global const int2* globalconstint2p,\n"
+    "                                   global const uint2 * globalconstuint2p,\n"
+    "                                   global const float2* globalconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n"
+    "                                            global const uchar2*restrict globalconstuchar2restrictp,\n"
+    "                                            global const short2 *restrict globalconstshort2restrictp,\n"
+    "                                            global const ushort2* restrict globalconstushort2restrictp,\n"
+    "                                            global const int2 * restrict globalconstint2restrictp,\n"
+    "                                            global const uint2*restrict globalconstuint2restrictp,\n"
+    "                                            global const float2 * restrict globalconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n"
+    "                                      global volatile uchar2 *globalvolatileuchar2p,\n"
+    "                                      global volatile short2* globalvolatileshort2p,\n"
+    "                                      global volatile ushort2 * globalvolatileushort2p,\n"
+    "                                      global volatile int2*globalvolatileint2p,\n"
+    "                                      global volatile uint2 *globalvolatileuint2p,\n"
+    "                                      global volatile float2*globalvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n"
+    "                                               global volatile uchar2* restrict globalvolatileuchar2restrictp,\n"
+    "                                               global volatile short2 * restrict globalvolatileshort2restrictp,\n"
+    "                                               global volatile ushort2*restrict globalvolatileushort2restrictp,\n"
+    "                                               global volatile int2 *restrict globalvolatileint2restrictp,\n"
+    "                                               global volatile uint2* restrict globalvolatileuint2restrictp,\n"
+    "                                               global volatile float2 *restrict globalvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n"
+    "                                            global const volatile uchar2 * globalconstvolatileuchar2p,\n"
+    "                                            global const volatile short2*globalconstvolatileshort2p,\n"
+    "                                            global const volatile ushort2 *globalconstvolatileushort2p,\n"
+    "                                            global const volatile int2* globalconstvolatileint2p,\n"
+    "                                            global const volatile uint2 * globalconstvolatileuint2p,\n"
+    "                                            global const volatile float2* globalconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n"
+    "                                                     global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n"
+    "                                                     global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n"
+    "                                                     global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n"
+    "                                                     global const volatile int2 * restrict globalconstvolatileint2restrictp,\n"
+    "                                                     global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n"
+    "                                                     global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_p(local char2*localchar2p,\n"
+    "                            local uchar2 *localuchar2p,\n"
+    "                            local short2* localshort2p,\n"
+    "                            local ushort2 * localushort2p,\n"
+    "                            local int2*localint2p,\n"
+    "                            local uint2 *localuint2p,\n"
+    "                            local float2*localfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n"
+    "                                     local uchar2* restrict localuchar2restrictp,\n"
+    "                                     local short2 * restrict localshort2restrictp,\n"
+    "                                     local ushort2*restrict localushort2restrictp,\n"
+    "                                     local int2 *restrict localint2restrictp,\n"
+    "                                     local uint2* restrict localuint2restrictp,\n"
+    "                                     local float2 *restrict localfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n"
+    "                                  local const uchar2 * localconstuchar2p,\n"
+    "                                  local const short2*localconstshort2p,\n"
+    "                                  local const ushort2 *localconstushort2p,\n"
+    "                                  local const int2* localconstint2p,\n"
+    "                                  local const uint2 * localconstuint2p,\n"
+    "                                  local const float2* localconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n"
+    "                                           local const uchar2*restrict localconstuchar2restrictp,\n"
+    "                                           local const short2 *restrict localconstshort2restrictp,\n"
+    "                                           local const ushort2* restrict localconstushort2restrictp,\n"
+    "                                           local const int2 * restrict localconstint2restrictp,\n"
+    "                                           local const uint2*restrict localconstuint2restrictp,\n"
+    "                                           local const float2 * restrict localconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n"
+    "                                     local volatile uchar2 *localvolatileuchar2p,\n"
+    "                                     local volatile short2* localvolatileshort2p,\n"
+    "                                     local volatile ushort2 * localvolatileushort2p,\n"
+    "                                     local volatile int2*localvolatileint2p,\n"
+    "                                     local volatile uint2 *localvolatileuint2p,\n"
+    "                                     local volatile float2*localvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n"
+    "                                              local volatile uchar2* restrict localvolatileuchar2restrictp,\n"
+    "                                              local volatile short2 * restrict localvolatileshort2restrictp,\n"
+    "                                              local volatile ushort2*restrict localvolatileushort2restrictp,\n"
+    "                                              local volatile int2 *restrict localvolatileint2restrictp,\n"
+    "                                              local volatile uint2* restrict localvolatileuint2restrictp,\n"
+    "                                              local volatile float2 *restrict localvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n"
+    "                                           local const volatile uchar2 * localconstvolatileuchar2p,\n"
+    "                                           local const volatile short2*localconstvolatileshort2p,\n"
+    "                                           local const volatile ushort2 *localconstvolatileushort2p,\n"
+    "                                           local const volatile int2* localconstvolatileint2p,\n"
+    "                                           local const volatile uint2 * localconstvolatileuint2p,\n"
+    "                                           local const volatile float2* localconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n"
+    "                                                    local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n"
+    "                                                    local const volatile short2 *restrict localconstvolatileshort2restrictp,\n"
+    "                                                    local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n"
+    "                                                    local const volatile int2 * restrict localconstvolatileint2restrictp,\n"
+    "                                                    local const volatile uint2*restrict localconstvolatileuint2restrictp,\n"
+    "                                                    local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector2_d(char2 char2d,\n"
+    "                      uchar2 uchar2d,\n"
+    "                      short2 short2d,\n"
+    "                      ushort2 ushort2d,\n"
+    "                      int2 int2d,\n"
+    "                      uint2 uint2d,\n"
+    "                      float2 float2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector2_d(const char2 constchar2d,\n"
+    "                            const uchar2 constuchar2d,\n"
+    "                            const short2 constshort2d,\n"
+    "                            const ushort2 constushort2d,\n"
+    "                            const int2 constint2d,\n"
+    "                            const uint2 constuint2d,\n"
+    "                            const float2 constfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector2_d(private char2 privatechar2d,\n"
+    "                              private uchar2 privateuchar2d,\n"
+    "                              private short2 privateshort2d,\n"
+    "                              private ushort2 privateushort2d,\n"
+    "                              private int2 privateint2d,\n"
+    "                              private uint2 privateuint2d,\n"
+    "                              private float2 privatefloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n"
+    "                                    private const uchar2 privateconstuchar2d,\n"
+    "                                    private const short2 privateconstshort2d,\n"
+    "                                    private const ushort2 privateconstushort2d,\n"
+    "                                    private const int2 privateconstint2d,\n"
+    "                                    private const uint2 privateconstuint2d,\n"
+    "                                    private const float2 privateconstfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p0(constant char3*constantchar3p,\n"
+    "                               constant uchar3 *constantuchar3p,\n"
+    "                               constant short3* constantshort3p,\n"
+    "                               constant ushort3 * constantushort3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p1(constant int3*constantint3p,\n"
+    "                               constant uint3 *constantuint3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n"
+    "                                        constant uchar3* restrict constantuchar3restrictp,\n"
+    "                                        constant short3 * restrict constantshort3restrictp,\n"
+    "                                        constant ushort3*restrict constantushort3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n"
+    "                                        constant uint3* restrict constantuint3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_p(global char3*globalchar3p,\n"
+    "                             global uchar3 *globaluchar3p,\n"
+    "                             global short3* globalshort3p,\n"
+    "                             global ushort3 * globalushort3p,\n"
+    "                             global int3*globalint3p,\n"
+    "                             global uint3 *globaluint3p,\n"
+    "                             global float3*globalfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n"
+    "                                      global uchar3* restrict globaluchar3restrictp,\n"
+    "                                      global short3 * restrict globalshort3restrictp,\n"
+    "                                      global ushort3*restrict globalushort3restrictp,\n"
+    "                                      global int3 *restrict globalint3restrictp,\n"
+    "                                      global uint3* restrict globaluint3restrictp,\n"
+    "                                      global float3 *restrict globalfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n"
+    "                                   global const uchar3 * globalconstuchar3p,\n"
+    "                                   global const short3*globalconstshort3p,\n"
+    "                                   global const ushort3 *globalconstushort3p,\n"
+    "                                   global const int3* globalconstint3p,\n"
+    "                                   global const uint3 * globalconstuint3p,\n"
+    "                                   global const float3* globalconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n"
+    "                                            global const uchar3*restrict globalconstuchar3restrictp,\n"
+    "                                            global const short3 *restrict globalconstshort3restrictp,\n"
+    "                                            global const ushort3* restrict globalconstushort3restrictp,\n"
+    "                                            global const int3 * restrict globalconstint3restrictp,\n"
+    "                                            global const uint3*restrict globalconstuint3restrictp,\n"
+    "                                            global const float3 * restrict globalconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n"
+    "                                      global volatile uchar3 *globalvolatileuchar3p,\n"
+    "                                      global volatile short3* globalvolatileshort3p,\n"
+    "                                      global volatile ushort3 * globalvolatileushort3p,\n"
+    "                                      global volatile int3*globalvolatileint3p,\n"
+    "                                      global volatile uint3 *globalvolatileuint3p,\n"
+    "                                      global volatile float3*globalvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n"
+    "                                               global volatile uchar3* restrict globalvolatileuchar3restrictp,\n"
+    "                                               global volatile short3 * restrict globalvolatileshort3restrictp,\n"
+    "                                               global volatile ushort3*restrict globalvolatileushort3restrictp,\n"
+    "                                               global volatile int3 *restrict globalvolatileint3restrictp,\n"
+    "                                               global volatile uint3* restrict globalvolatileuint3restrictp,\n"
+    "                                               global volatile float3 *restrict globalvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n"
+    "                                            global const volatile uchar3 * globalconstvolatileuchar3p,\n"
+    "                                            global const volatile short3*globalconstvolatileshort3p,\n"
+    "                                            global const volatile ushort3 *globalconstvolatileushort3p,\n"
+    "                                            global const volatile int3* globalconstvolatileint3p,\n"
+    "                                            global const volatile uint3 * globalconstvolatileuint3p,\n"
+    "                                            global const volatile float3* globalconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n"
+    "                                                     global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n"
+    "                                                     global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n"
+    "                                                     global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n"
+    "                                                     global const volatile int3 * restrict globalconstvolatileint3restrictp,\n"
+    "                                                     global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n"
+    "                                                     global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_p(local char3*localchar3p,\n"
+    "                            local uchar3 *localuchar3p,\n"
+    "                            local short3* localshort3p,\n"
+    "                            local ushort3 * localushort3p,\n"
+    "                            local int3*localint3p,\n"
+    "                            local uint3 *localuint3p,\n"
+    "                            local float3*localfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n"
+    "                                     local uchar3* restrict localuchar3restrictp,\n"
+    "                                     local short3 * restrict localshort3restrictp,\n"
+    "                                     local ushort3*restrict localushort3restrictp,\n"
+    "                                     local int3 *restrict localint3restrictp,\n"
+    "                                     local uint3* restrict localuint3restrictp,\n"
+    "                                     local float3 *restrict localfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n"
+    "                                  local const uchar3 * localconstuchar3p,\n"
+    "                                  local const short3*localconstshort3p,\n"
+    "                                  local const ushort3 *localconstushort3p,\n"
+    "                                  local const int3* localconstint3p,\n"
+    "                                  local const uint3 * localconstuint3p,\n"
+    "                                  local const float3* localconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n"
+    "                                           local const uchar3*restrict localconstuchar3restrictp,\n"
+    "                                           local const short3 *restrict localconstshort3restrictp,\n"
+    "                                           local const ushort3* restrict localconstushort3restrictp,\n"
+    "                                           local const int3 * restrict localconstint3restrictp,\n"
+    "                                           local const uint3*restrict localconstuint3restrictp,\n"
+    "                                           local const float3 * restrict localconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n"
+    "                                     local volatile uchar3 *localvolatileuchar3p,\n"
+    "                                     local volatile short3* localvolatileshort3p,\n"
+    "                                     local volatile ushort3 * localvolatileushort3p,\n"
+    "                                     local volatile int3*localvolatileint3p,\n"
+    "                                     local volatile uint3 *localvolatileuint3p,\n"
+    "                                     local volatile float3*localvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n"
+    "                                              local volatile uchar3* restrict localvolatileuchar3restrictp,\n"
+    "                                              local volatile short3 * restrict localvolatileshort3restrictp,\n"
+    "                                              local volatile ushort3*restrict localvolatileushort3restrictp,\n"
+    "                                              local volatile int3 *restrict localvolatileint3restrictp,\n"
+    "                                              local volatile uint3* restrict localvolatileuint3restrictp,\n"
+    "                                              local volatile float3 *restrict localvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n"
+    "                                           local const volatile uchar3 * localconstvolatileuchar3p,\n"
+    "                                           local const volatile short3*localconstvolatileshort3p,\n"
+    "                                           local const volatile ushort3 *localconstvolatileushort3p,\n"
+    "                                           local const volatile int3* localconstvolatileint3p,\n"
+    "                                           local const volatile uint3 * localconstvolatileuint3p,\n"
+    "                                           local const volatile float3* localconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n"
+    "                                                    local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n"
+    "                                                    local const volatile short3 *restrict localconstvolatileshort3restrictp,\n"
+    "                                                    local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n"
+    "                                                    local const volatile int3 * restrict localconstvolatileint3restrictp,\n"
+    "                                                    local const volatile uint3*restrict localconstvolatileuint3restrictp,\n"
+    "                                                    local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector3_d(char3 char3d,\n"
+    "                      uchar3 uchar3d,\n"
+    "                      short3 short3d,\n"
+    "                      ushort3 ushort3d,\n"
+    "                      int3 int3d,\n"
+    "                      uint3 uint3d,\n"
+    "                      float3 float3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector3_d(const char3 constchar3d,\n"
+    "                            const uchar3 constuchar3d,\n"
+    "                            const short3 constshort3d,\n"
+    "                            const ushort3 constushort3d,\n"
+    "                            const int3 constint3d,\n"
+    "                            const uint3 constuint3d,\n"
+    "                            const float3 constfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector3_d(private char3 privatechar3d,\n"
+    "                              private uchar3 privateuchar3d,\n"
+    "                              private short3 privateshort3d,\n"
+    "                              private ushort3 privateushort3d,\n"
+    "                              private int3 privateint3d,\n"
+    "                              private uint3 privateuint3d,\n"
+    "                              private float3 privatefloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n"
+    "                                    private const uchar3 privateconstuchar3d,\n"
+    "                                    private const short3 privateconstshort3d,\n"
+    "                                    private const ushort3 privateconstushort3d,\n"
+    "                                    private const int3 privateconstint3d,\n"
+    "                                    private const uint3 privateconstuint3d,\n"
+    "                                    private const float3 privateconstfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p0(constant char4*constantchar4p,\n"
+    "                               constant uchar4 *constantuchar4p,\n"
+    "                               constant short4* constantshort4p,\n"
+    "                               constant ushort4 * constantushort4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p1(constant int4*constantint4p,\n"
+    "                               constant uint4 *constantuint4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n"
+    "                                        constant uchar4* restrict constantuchar4restrictp,\n"
+    "                                        constant short4 * restrict constantshort4restrictp,\n"
+    "                                        constant ushort4*restrict constantushort4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n"
+    "                                        constant uint4* restrict constantuint4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_p(global char4*globalchar4p,\n"
+    "                             global uchar4 *globaluchar4p,\n"
+    "                             global short4* globalshort4p,\n"
+    "                             global ushort4 * globalushort4p,\n"
+    "                             global int4*globalint4p,\n"
+    "                             global uint4 *globaluint4p,\n"
+    "                             global float4*globalfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n"
+    "                                      global uchar4* restrict globaluchar4restrictp,\n"
+    "                                      global short4 * restrict globalshort4restrictp,\n"
+    "                                      global ushort4*restrict globalushort4restrictp,\n"
+    "                                      global int4 *restrict globalint4restrictp,\n"
+    "                                      global uint4* restrict globaluint4restrictp,\n"
+    "                                      global float4 *restrict globalfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n"
+    "                                   global const uchar4 * globalconstuchar4p,\n"
+    "                                   global const short4*globalconstshort4p,\n"
+    "                                   global const ushort4 *globalconstushort4p,\n"
+    "                                   global const int4* globalconstint4p,\n"
+    "                                   global const uint4 * globalconstuint4p,\n"
+    "                                   global const float4* globalconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n"
+    "                                            global const uchar4*restrict globalconstuchar4restrictp,\n"
+    "                                            global const short4 *restrict globalconstshort4restrictp,\n"
+    "                                            global const ushort4* restrict globalconstushort4restrictp,\n"
+    "                                            global const int4 * restrict globalconstint4restrictp,\n"
+    "                                            global const uint4*restrict globalconstuint4restrictp,\n"
+    "                                            global const float4 * restrict globalconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n"
+    "                                      global volatile uchar4 *globalvolatileuchar4p,\n"
+    "                                      global volatile short4* globalvolatileshort4p,\n"
+    "                                      global volatile ushort4 * globalvolatileushort4p,\n"
+    "                                      global volatile int4*globalvolatileint4p,\n"
+    "                                      global volatile uint4 *globalvolatileuint4p,\n"
+    "                                      global volatile float4*globalvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n"
+    "                                               global volatile uchar4* restrict globalvolatileuchar4restrictp,\n"
+    "                                               global volatile short4 * restrict globalvolatileshort4restrictp,\n"
+    "                                               global volatile ushort4*restrict globalvolatileushort4restrictp,\n"
+    "                                               global volatile int4 *restrict globalvolatileint4restrictp,\n"
+    "                                               global volatile uint4* restrict globalvolatileuint4restrictp,\n"
+    "                                               global volatile float4 *restrict globalvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n"
+    "                                            global const volatile uchar4 * globalconstvolatileuchar4p,\n"
+    "                                            global const volatile short4*globalconstvolatileshort4p,\n"
+    "                                            global const volatile ushort4 *globalconstvolatileushort4p,\n"
+    "                                            global const volatile int4* globalconstvolatileint4p,\n"
+    "                                            global const volatile uint4 * globalconstvolatileuint4p,\n"
+    "                                            global const volatile float4* globalconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n"
+    "                                                     global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n"
+    "                                                     global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n"
+    "                                                     global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n"
+    "                                                     global const volatile int4 * restrict globalconstvolatileint4restrictp,\n"
+    "                                                     global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n"
+    "                                                     global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_p(local char4*localchar4p,\n"
+    "                            local uchar4 *localuchar4p,\n"
+    "                            local short4* localshort4p,\n"
+    "                            local ushort4 * localushort4p,\n"
+    "                            local int4*localint4p,\n"
+    "                            local uint4 *localuint4p,\n"
+    "                            local float4*localfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n"
+    "                                     local uchar4* restrict localuchar4restrictp,\n"
+    "                                     local short4 * restrict localshort4restrictp,\n"
+    "                                     local ushort4*restrict localushort4restrictp,\n"
+    "                                     local int4 *restrict localint4restrictp,\n"
+    "                                     local uint4* restrict localuint4restrictp,\n"
+    "                                     local float4 *restrict localfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n"
+    "                                  local const uchar4 * localconstuchar4p,\n"
+    "                                  local const short4*localconstshort4p,\n"
+    "                                  local const ushort4 *localconstushort4p,\n"
+    "                                  local const int4* localconstint4p,\n"
+    "                                  local const uint4 * localconstuint4p,\n"
+    "                                  local const float4* localconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n"
+    "                                           local const uchar4*restrict localconstuchar4restrictp,\n"
+    "                                           local const short4 *restrict localconstshort4restrictp,\n"
+    "                                           local const ushort4* restrict localconstushort4restrictp,\n"
+    "                                           local const int4 * restrict localconstint4restrictp,\n"
+    "                                           local const uint4*restrict localconstuint4restrictp,\n"
+    "                                           local const float4 * restrict localconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n"
+    "                                     local volatile uchar4 *localvolatileuchar4p,\n"
+    "                                     local volatile short4* localvolatileshort4p,\n"
+    "                                     local volatile ushort4 * localvolatileushort4p,\n"
+    "                                     local volatile int4*localvolatileint4p,\n"
+    "                                     local volatile uint4 *localvolatileuint4p,\n"
+    "                                     local volatile float4*localvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n"
+    "                                              local volatile uchar4* restrict localvolatileuchar4restrictp,\n"
+    "                                              local volatile short4 * restrict localvolatileshort4restrictp,\n"
+    "                                              local volatile ushort4*restrict localvolatileushort4restrictp,\n"
+    "                                              local volatile int4 *restrict localvolatileint4restrictp,\n"
+    "                                              local volatile uint4* restrict localvolatileuint4restrictp,\n"
+    "                                              local volatile float4 *restrict localvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n"
+    "                                           local const volatile uchar4 * localconstvolatileuchar4p,\n"
+    "                                           local const volatile short4*localconstvolatileshort4p,\n"
+    "                                           local const volatile ushort4 *localconstvolatileushort4p,\n"
+    "                                           local const volatile int4* localconstvolatileint4p,\n"
+    "                                           local const volatile uint4 * localconstvolatileuint4p,\n"
+    "                                           local const volatile float4* localconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n"
+    "                                                    local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n"
+    "                                                    local const volatile short4 *restrict localconstvolatileshort4restrictp,\n"
+    "                                                    local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n"
+    "                                                    local const volatile int4 * restrict localconstvolatileint4restrictp,\n"
+    "                                                    local const volatile uint4*restrict localconstvolatileuint4restrictp,\n"
+    "                                                    local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector4_d(char4 char4d,\n"
+    "                      uchar4 uchar4d,\n"
+    "                      short4 short4d,\n"
+    "                      ushort4 ushort4d,\n"
+    "                      int4 int4d,\n"
+    "                      uint4 uint4d,\n"
+    "                      float4 float4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector4_d(const char4 constchar4d,\n"
+    "                            const uchar4 constuchar4d,\n"
+    "                            const short4 constshort4d,\n"
+    "                            const ushort4 constushort4d,\n"
+    "                            const int4 constint4d,\n"
+    "                            const uint4 constuint4d,\n"
+    "                            const float4 constfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector4_d(private char4 privatechar4d,\n"
+    "                              private uchar4 privateuchar4d,\n"
+    "                              private short4 privateshort4d,\n"
+    "                              private ushort4 privateushort4d,\n"
+    "                              private int4 privateint4d,\n"
+    "                              private uint4 privateuint4d,\n"
+    "                              private float4 privatefloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n"
+    "                                    private const uchar4 privateconstuchar4d,\n"
+    "                                    private const short4 privateconstshort4d,\n"
+    "                                    private const ushort4 privateconstushort4d,\n"
+    "                                    private const int4 privateconstint4d,\n"
+    "                                    private const uint4 privateconstuint4d,\n"
+    "                                    private const float4 privateconstfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p0(constant char8*constantchar8p,\n"
+    "                               constant uchar8 *constantuchar8p,\n"
+    "                               constant short8* constantshort8p,\n"
+    "                               constant ushort8 * constantushort8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p1(constant int8*constantint8p,\n"
+    "                               constant uint8 *constantuint8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n"
+    "                                        constant uchar8* restrict constantuchar8restrictp,\n"
+    "                                        constant short8 * restrict constantshort8restrictp,\n"
+    "                                        constant ushort8*restrict constantushort8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n"
+    "                                        constant uint8* restrict constantuint8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_p(global char8*globalchar8p,\n"
+    "                             global uchar8 *globaluchar8p,\n"
+    "                             global short8* globalshort8p,\n"
+    "                             global ushort8 * globalushort8p,\n"
+    "                             global int8*globalint8p,\n"
+    "                             global uint8 *globaluint8p,\n"
+    "                             global float8*globalfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n"
+    "                                      global uchar8* restrict globaluchar8restrictp,\n"
+    "                                      global short8 * restrict globalshort8restrictp,\n"
+    "                                      global ushort8*restrict globalushort8restrictp,\n"
+    "                                      global int8 *restrict globalint8restrictp,\n"
+    "                                      global uint8* restrict globaluint8restrictp,\n"
+    "                                      global float8 *restrict globalfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n"
+    "                                   global const uchar8 * globalconstuchar8p,\n"
+    "                                   global const short8*globalconstshort8p,\n"
+    "                                   global const ushort8 *globalconstushort8p,\n"
+    "                                   global const int8* globalconstint8p,\n"
+    "                                   global const uint8 * globalconstuint8p,\n"
+    "                                   global const float8* globalconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n"
+    "                                            global const uchar8*restrict globalconstuchar8restrictp,\n"
+    "                                            global const short8 *restrict globalconstshort8restrictp,\n"
+    "                                            global const ushort8* restrict globalconstushort8restrictp,\n"
+    "                                            global const int8 * restrict globalconstint8restrictp,\n"
+    "                                            global const uint8*restrict globalconstuint8restrictp,\n"
+    "                                            global const float8 * restrict globalconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n"
+    "                                      global volatile uchar8 *globalvolatileuchar8p,\n"
+    "                                      global volatile short8* globalvolatileshort8p,\n"
+    "                                      global volatile ushort8 * globalvolatileushort8p,\n"
+    "                                      global volatile int8*globalvolatileint8p,\n"
+    "                                      global volatile uint8 *globalvolatileuint8p,\n"
+    "                                      global volatile float8*globalvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n"
+    "                                               global volatile uchar8* restrict globalvolatileuchar8restrictp,\n"
+    "                                               global volatile short8 * restrict globalvolatileshort8restrictp,\n"
+    "                                               global volatile ushort8*restrict globalvolatileushort8restrictp,\n"
+    "                                               global volatile int8 *restrict globalvolatileint8restrictp,\n"
+    "                                               global volatile uint8* restrict globalvolatileuint8restrictp,\n"
+    "                                               global volatile float8 *restrict globalvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n"
+    "                                            global const volatile uchar8 * globalconstvolatileuchar8p,\n"
+    "                                            global const volatile short8*globalconstvolatileshort8p,\n"
+    "                                            global const volatile ushort8 *globalconstvolatileushort8p,\n"
+    "                                            global const volatile int8* globalconstvolatileint8p,\n"
+    "                                            global const volatile uint8 * globalconstvolatileuint8p,\n"
+    "                                            global const volatile float8* globalconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n"
+    "                                                     global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n"
+    "                                                     global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n"
+    "                                                     global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n"
+    "                                                     global const volatile int8 * restrict globalconstvolatileint8restrictp,\n"
+    "                                                     global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n"
+    "                                                     global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_p(local char8*localchar8p,\n"
+    "                            local uchar8 *localuchar8p,\n"
+    "                            local short8* localshort8p,\n"
+    "                            local ushort8 * localushort8p,\n"
+    "                            local int8*localint8p,\n"
+    "                            local uint8 *localuint8p,\n"
+    "                            local float8*localfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n"
+    "                                     local uchar8* restrict localuchar8restrictp,\n"
+    "                                     local short8 * restrict localshort8restrictp,\n"
+    "                                     local ushort8*restrict localushort8restrictp,\n"
+    "                                     local int8 *restrict localint8restrictp,\n"
+    "                                     local uint8* restrict localuint8restrictp,\n"
+    "                                     local float8 *restrict localfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n"
+    "                                  local const uchar8 * localconstuchar8p,\n"
+    "                                  local const short8*localconstshort8p,\n"
+    "                                  local const ushort8 *localconstushort8p,\n"
+    "                                  local const int8* localconstint8p,\n"
+    "                                  local const uint8 * localconstuint8p,\n"
+    "                                  local const float8* localconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n"
+    "                                           local const uchar8*restrict localconstuchar8restrictp,\n"
+    "                                           local const short8 *restrict localconstshort8restrictp,\n"
+    "                                           local const ushort8* restrict localconstushort8restrictp,\n"
+    "                                           local const int8 * restrict localconstint8restrictp,\n"
+    "                                           local const uint8*restrict localconstuint8restrictp,\n"
+    "                                           local const float8 * restrict localconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n"
+    "                                     local volatile uchar8 *localvolatileuchar8p,\n"
+    "                                     local volatile short8* localvolatileshort8p,\n"
+    "                                     local volatile ushort8 * localvolatileushort8p,\n"
+    "                                     local volatile int8*localvolatileint8p,\n"
+    "                                     local volatile uint8 *localvolatileuint8p,\n"
+    "                                     local volatile float8*localvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n"
+    "                                              local volatile uchar8* restrict localvolatileuchar8restrictp,\n"
+    "                                              local volatile short8 * restrict localvolatileshort8restrictp,\n"
+    "                                              local volatile ushort8*restrict localvolatileushort8restrictp,\n"
+    "                                              local volatile int8 *restrict localvolatileint8restrictp,\n"
+    "                                              local volatile uint8* restrict localvolatileuint8restrictp,\n"
+    "                                              local volatile float8 *restrict localvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n"
+    "                                           local const volatile uchar8 * localconstvolatileuchar8p,\n"
+    "                                           local const volatile short8*localconstvolatileshort8p,\n"
+    "                                           local const volatile ushort8 *localconstvolatileushort8p,\n"
+    "                                           local const volatile int8* localconstvolatileint8p,\n"
+    "                                           local const volatile uint8 * localconstvolatileuint8p,\n"
+    "                                           local const volatile float8* localconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n"
+    "                                                    local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n"
+    "                                                    local const volatile short8 *restrict localconstvolatileshort8restrictp,\n"
+    "                                                    local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n"
+    "                                                    local const volatile int8 * restrict localconstvolatileint8restrictp,\n"
+    "                                                    local const volatile uint8*restrict localconstvolatileuint8restrictp,\n"
+    "                                                    local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector8_d(char8 char8d,\n"
+    "                      uchar8 uchar8d,\n"
+    "                      short8 short8d,\n"
+    "                      ushort8 ushort8d,\n"
+    "                      int8 int8d,\n"
+    "                      uint8 uint8d,\n"
+    "                      float8 float8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector8_d(const char8 constchar8d,\n"
+    "                            const uchar8 constuchar8d,\n"
+    "                            const short8 constshort8d,\n"
+    "                            const ushort8 constushort8d,\n"
+    "                            const int8 constint8d,\n"
+    "                            const uint8 constuint8d,\n"
+    "                            const float8 constfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector8_d(private char8 privatechar8d,\n"
+    "                              private uchar8 privateuchar8d,\n"
+    "                              private short8 privateshort8d,\n"
+    "                              private ushort8 privateushort8d,\n"
+    "                              private int8 privateint8d,\n"
+    "                              private uint8 privateuint8d,\n"
+    "                              private float8 privatefloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n"
+    "                                    private const uchar8 privateconstuchar8d,\n"
+    "                                    private const short8 privateconstshort8d,\n"
+    "                                    private const ushort8 privateconstushort8d,\n"
+    "                                    private const int8 privateconstint8d,\n"
+    "                                    private const uint8 privateconstuint8d,\n"
+    "                                    private const float8 privateconstfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p0(constant char16*constantchar16p,\n"
+    "                                constant uchar16 *constantuchar16p,\n"
+    "                                constant short16* constantshort16p,\n"
+    "                                constant ushort16 * constantushort16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p1(constant int16*constantint16p,\n"
+    "                                constant uint16 *constantuint16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n"
+    "                                         constant uchar16* restrict constantuchar16restrictp,\n"
+    "                                         constant short16 * restrict constantshort16restrictp,\n"
+    "                                         constant ushort16*restrict constantushort16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n"
+    "                                         constant uint16* restrict constantuint16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_p(global char16*globalchar16p,\n"
+    "                              global uchar16 *globaluchar16p,\n"
+    "                              global short16* globalshort16p,\n"
+    "                              global ushort16 * globalushort16p,\n"
+    "                              global int16*globalint16p,\n"
+    "                              global uint16 *globaluint16p,\n"
+    "                              global float16*globalfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n"
+    "                                       global uchar16* restrict globaluchar16restrictp,\n"
+    "                                       global short16 * restrict globalshort16restrictp,\n"
+    "                                       global ushort16*restrict globalushort16restrictp,\n"
+    "                                       global int16 *restrict globalint16restrictp,\n"
+    "                                       global uint16* restrict globaluint16restrictp,\n"
+    "                                       global float16 *restrict globalfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n"
+    "                                    global const uchar16 * globalconstuchar16p,\n"
+    "                                    global const short16*globalconstshort16p,\n"
+    "                                    global const ushort16 *globalconstushort16p,\n"
+    "                                    global const int16* globalconstint16p,\n"
+    "                                    global const uint16 * globalconstuint16p,\n"
+    "                                    global const float16* globalconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n"
+    "                                             global const uchar16*restrict globalconstuchar16restrictp,\n"
+    "                                             global const short16 *restrict globalconstshort16restrictp,\n"
+    "                                             global const ushort16* restrict globalconstushort16restrictp,\n"
+    "                                             global const int16 * restrict globalconstint16restrictp,\n"
+    "                                             global const uint16*restrict globalconstuint16restrictp,\n"
+    "                                             global const float16 * restrict globalconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n"
+    "                                       global volatile uchar16 *globalvolatileuchar16p,\n"
+    "                                       global volatile short16* globalvolatileshort16p,\n"
+    "                                       global volatile ushort16 * globalvolatileushort16p,\n"
+    "                                       global volatile int16*globalvolatileint16p,\n"
+    "                                       global volatile uint16 *globalvolatileuint16p,\n"
+    "                                       global volatile float16*globalvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n"
+    "                                                global volatile uchar16* restrict globalvolatileuchar16restrictp,\n"
+    "                                                global volatile short16 * restrict globalvolatileshort16restrictp,\n"
+    "                                                global volatile ushort16*restrict globalvolatileushort16restrictp,\n"
+    "                                                global volatile int16 *restrict globalvolatileint16restrictp,\n"
+    "                                                global volatile uint16* restrict globalvolatileuint16restrictp,\n"
+    "                                                global volatile float16 *restrict globalvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n"
+    "                                             global const volatile uchar16 * globalconstvolatileuchar16p,\n"
+    "                                             global const volatile short16*globalconstvolatileshort16p,\n"
+    "                                             global const volatile ushort16 *globalconstvolatileushort16p,\n"
+    "                                             global const volatile int16* globalconstvolatileint16p,\n"
+    "                                             global const volatile uint16 * globalconstvolatileuint16p,\n"
+    "                                             global const volatile float16* globalconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n"
+    "                                                      global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n"
+    "                                                      global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n"
+    "                                                      global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n"
+    "                                                      global const volatile int16 * restrict globalconstvolatileint16restrictp,\n"
+    "                                                      global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n"
+    "                                                      global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_p(local char16*localchar16p,\n"
+    "                             local uchar16 *localuchar16p,\n"
+    "                             local short16* localshort16p,\n"
+    "                             local ushort16 * localushort16p,\n"
+    "                             local int16*localint16p,\n"
+    "                             local uint16 *localuint16p,\n"
+    "                             local float16*localfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n"
+    "                                      local uchar16* restrict localuchar16restrictp,\n"
+    "                                      local short16 * restrict localshort16restrictp,\n"
+    "                                      local ushort16*restrict localushort16restrictp,\n"
+    "                                      local int16 *restrict localint16restrictp,\n"
+    "                                      local uint16* restrict localuint16restrictp,\n"
+    "                                      local float16 *restrict localfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n"
+    "                                   local const uchar16 * localconstuchar16p,\n"
+    "                                   local const short16*localconstshort16p,\n"
+    "                                   local const ushort16 *localconstushort16p,\n"
+    "                                   local const int16* localconstint16p,\n"
+    "                                   local const uint16 * localconstuint16p,\n"
+    "                                   local const float16* localconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n"
+    "                                            local const uchar16*restrict localconstuchar16restrictp,\n"
+    "                                            local const short16 *restrict localconstshort16restrictp,\n"
+    "                                            local const ushort16* restrict localconstushort16restrictp,\n"
+    "                                            local const int16 * restrict localconstint16restrictp,\n"
+    "                                            local const uint16*restrict localconstuint16restrictp,\n"
+    "                                            local const float16 * restrict localconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n"
+    "                                      local volatile uchar16 *localvolatileuchar16p,\n"
+    "                                      local volatile short16* localvolatileshort16p,\n"
+    "                                      local volatile ushort16 * localvolatileushort16p,\n"
+    "                                      local volatile int16*localvolatileint16p,\n"
+    "                                      local volatile uint16 *localvolatileuint16p,\n"
+    "                                      local volatile float16*localvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n"
+    "                                               local volatile uchar16* restrict localvolatileuchar16restrictp,\n"
+    "                                               local volatile short16 * restrict localvolatileshort16restrictp,\n"
+    "                                               local volatile ushort16*restrict localvolatileushort16restrictp,\n"
+    "                                               local volatile int16 *restrict localvolatileint16restrictp,\n"
+    "                                               local volatile uint16* restrict localvolatileuint16restrictp,\n"
+    "                                               local volatile float16 *restrict localvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n"
+    "                                            local const volatile uchar16 * localconstvolatileuchar16p,\n"
+    "                                            local const volatile short16*localconstvolatileshort16p,\n"
+    "                                            local const volatile ushort16 *localconstvolatileushort16p,\n"
+    "                                            local const volatile int16* localconstvolatileint16p,\n"
+    "                                            local const volatile uint16 * localconstvolatileuint16p,\n"
+    "                                            local const volatile float16* localconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n"
+    "                                                     local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n"
+    "                                                     local const volatile short16 *restrict localconstvolatileshort16restrictp,\n"
+    "                                                     local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n"
+    "                                                     local const volatile int16 * restrict localconstvolatileint16restrictp,\n"
+    "                                                     local const volatile uint16*restrict localconstvolatileuint16restrictp,\n"
+    "                                                     local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector16_d(char16 char16d,\n"
+    "                       uchar16 uchar16d,\n"
+    "                       short16 short16d,\n"
+    "                       ushort16 ushort16d,\n"
+    "                       int16 int16d,\n"
+    "                       uint16 uint16d,\n"
+    "                       float16 float16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector16_d(const char16 constchar16d,\n"
+    "                             const uchar16 constuchar16d,\n"
+    "                             const short16 constshort16d,\n"
+    "                             const ushort16 constushort16d,\n"
+    "                             const int16 constint16d,\n"
+    "                             const uint16 constuint16d,\n"
+    "                             const float16 constfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector16_d(private char16 privatechar16d,\n"
+    "                               private uchar16 privateuchar16d,\n"
+    "                               private short16 privateshort16d,\n"
+    "                               private ushort16 privateushort16d,\n"
+    "                               private int16 privateint16d,\n"
+    "                               private uint16 privateuint16d,\n"
+    "                               private float16 privatefloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n"
+    "                                     private const uchar16 privateconstuchar16d,\n"
+    "                                     private const short16 privateconstshort16d,\n"
+    "                                     private const ushort16 privateconstushort16d,\n"
+    "                                     private const int16 privateconstint16d,\n"
+    "                                     private const uint16 privateconstuint16d,\n"
+    "                                     private const float16 privateconstfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n"
+    "                               constant struct struct_type *constantstructstruct_typep,\n"
+    "                               constant typedef_struct_type* constanttypedef_struct_typep,\n"
+    "                               constant union union_type * constantunionunion_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n"
+    "                               constant enum enum_type *constantenumenum_typep,\n"
+    "                               constant typedef_enum_type* constanttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n"
+    "                                        constant struct struct_type*restrict constantstructstruct_typerestrictp,\n"
+    "                                        constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n"
+    "                                        constant union union_type* restrict constantunionunion_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n"
+    "                                        constant enum enum_type*restrict constantenumenum_typerestrictp,\n"
+    "                                        constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n"
+    "                             global struct struct_type *globalstructstruct_typep,\n"
+    "                             global typedef_struct_type* globaltypedef_struct_typep,\n"
+    "                             global union union_type * globalunionunion_typep,\n"
+    "                             global typedef_union_type*globaltypedef_union_typep,\n"
+    "                             global enum enum_type *globalenumenum_typep,\n"
+    "                             global typedef_enum_type* globaltypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n"
+    "                                      global struct struct_type*restrict globalstructstruct_typerestrictp,\n"
+    "                                      global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n"
+    "                                      global union union_type* restrict globalunionunion_typerestrictp,\n"
+    "                                      global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n"
+    "                                      global enum enum_type*restrict globalenumenum_typerestrictp,\n"
+    "                                      global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n"
+    "                                   global const struct struct_type * globalconststructstruct_typep,\n"
+    "                                   global const typedef_struct_type*globalconsttypedef_struct_typep,\n"
+    "                                   global const union union_type *globalconstunionunion_typep,\n"
+    "                                   global const typedef_union_type* globalconsttypedef_union_typep,\n"
+    "                                   global const enum enum_type * globalconstenumenum_typep,\n"
+    "                                   global const typedef_enum_type*globalconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n"
+    "                                            global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n"
+    "                                            global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n"
+    "                                            global const union union_type*restrict globalconstunionunion_typerestrictp,\n"
+    "                                            global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n"
+    "                                            global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n"
+    "                                            global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n"
+    "                                      global volatile struct struct_type *globalvolatilestructstruct_typep,\n"
+    "                                      global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n"
+    "                                      global volatile union union_type * globalvolatileunionunion_typep,\n"
+    "                                      global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n"
+    "                                      global volatile enum enum_type *globalvolatileenumenum_typep,\n"
+    "                                      global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n"
+    "                                               global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n"
+    "                                               global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n"
+    "                                               global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n"
+    "                                               global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n"
+    "                                               global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n"
+    "                                               global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n"
+    "                                            global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n"
+    "                                            global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n"
+    "                                            global const volatile union union_type *globalconstvolatileunionunion_typep,\n"
+    "                                            global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n"
+    "                                            global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n"
+    "                                            global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n"
+    "                                                     global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n"
+    "                                                     global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                     global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n"
+    "                                                     global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                     global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n"
+    "                                                     global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n"
+    "                            local struct struct_type *localstructstruct_typep,\n"
+    "                            local typedef_struct_type* localtypedef_struct_typep,\n"
+    "                            local union union_type * localunionunion_typep,\n"
+    "                            local typedef_union_type*localtypedef_union_typep,\n"
+    "                            local enum enum_type *localenumenum_typep,\n"
+    "                            local typedef_enum_type* localtypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n"
+    "                                     local struct struct_type*restrict localstructstruct_typerestrictp,\n"
+    "                                     local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n"
+    "                                     local union union_type* restrict localunionunion_typerestrictp,\n"
+    "                                     local typedef_union_type * restrict localtypedef_union_typerestrictp,\n"
+    "                                     local enum enum_type*restrict localenumenum_typerestrictp,\n"
+    "                                     local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n"
+    "                                  local const struct struct_type * localconststructstruct_typep,\n"
+    "                                  local const typedef_struct_type*localconsttypedef_struct_typep,\n"
+    "                                  local const union union_type *localconstunionunion_typep,\n"
+    "                                  local const typedef_union_type* localconsttypedef_union_typep,\n"
+    "                                  local const enum enum_type * localconstenumenum_typep,\n"
+    "                                  local const typedef_enum_type*localconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n"
+    "                                           local const struct struct_type* restrict localconststructstruct_typerestrictp,\n"
+    "                                           local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n"
+    "                                           local const union union_type*restrict localconstunionunion_typerestrictp,\n"
+    "                                           local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n"
+    "                                           local const enum enum_type* restrict localconstenumenum_typerestrictp,\n"
+    "                                           local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n"
+    "                                     local volatile struct struct_type *localvolatilestructstruct_typep,\n"
+    "                                     local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n"
+    "                                     local volatile union union_type * localvolatileunionunion_typep,\n"
+    "                                     local volatile typedef_union_type*localvolatiletypedef_union_typep,\n"
+    "                                     local volatile enum enum_type *localvolatileenumenum_typep,\n"
+    "                                     local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n"
+    "                                              local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n"
+    "                                              local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n"
+    "                                              local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n"
+    "                                              local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n"
+    "                                              local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n"
+    "                                              local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n"
+    "                                           local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n"
+    "                                           local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n"
+    "                                           local const volatile union union_type *localconstvolatileunionunion_typep,\n"
+    "                                           local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n"
+    "                                           local const volatile enum enum_type * localconstvolatileenumenum_typep,\n"
+    "                                           local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n"
+    "                                                    local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n"
+    "                                                    local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                    local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n"
+    "                                                    local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                    local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n"
+    "                                                    local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void derived_d(typedef_type typedef_typed,\n"
+    "                      struct struct_type structstruct_typed,\n"
+    "                      typedef_struct_type typedef_struct_typed,\n"
+    "                      union union_type unionunion_typed,\n"
+    "                      typedef_union_type typedef_union_typed,\n"
+    "                      enum enum_type enumenum_typed,\n"
+    "                      typedef_enum_type typedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_derived_d(const typedef_type consttypedef_typed,\n"
+    "                            const struct struct_type conststructstruct_typed,\n"
+    "                            const typedef_struct_type consttypedef_struct_typed,\n"
+    "                            const union union_type constunionunion_typed,\n"
+    "                            const typedef_union_type consttypedef_union_typed,\n"
+    "                            const enum enum_type constenumenum_typed,\n"
+    "                            const typedef_enum_type consttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n"
+    "                              private struct struct_type privatestructstruct_typed,\n"
+    "                              private typedef_struct_type privatetypedef_struct_typed,\n"
+    "                              private union union_type privateunionunion_typed,\n"
+    "                              private typedef_union_type privatetypedef_union_typed,\n"
+    "                              private enum enum_type privateenumenum_typed,\n"
+    "                              private typedef_enum_type privatetypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n"
+    "                                    private const struct struct_type privateconststructstruct_typed,\n"
+    "                                    private const typedef_struct_type privateconsttypedef_struct_typed,\n"
+    "                                    private const union union_type privateconstunionunion_typed,\n"
+    "                                    private const typedef_union_type privateconsttypedef_union_typed,\n"
+    "                                    private const enum enum_type privateconstenumenum_typed,\n"
+    "                                    private const typedef_enum_type privateconsttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
 };
 
-static const std::vector<std::string> image_arguments = {
-    "image2d_t", "image3d_t",        "image2d_array_t",
-    "image1d_t", "image1d_buffer_t", "image1d_array_t"
+const char * required_arg_info[][72] = {
+    // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+    {
+        "constant_scalar_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp",
+        NULL
+  },
+  {
+        "constant_scalar_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp",
+        NULL
+  },
+  {
+        "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp",
+        NULL
+  },
+  {
+        "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp",
+        NULL
+    },
+    {
+        "constant_scalar_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd",
+        NULL
+    },
+    {
+        "constant_vector2_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p",
+        NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p",
+        NULL
+    },
+    {
+        "constant_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d",
+        NULL
+    },
+    {
+        "constant_vector3_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p",
+        NULL
+    },
+    {
+        "constant_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d",
+        NULL
+    },
+    {
+        "constant_vector4_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p",
+        NULL
+    },
+    {
+        "constant_vector4_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d",
+        NULL
+    },
+    {
+        "constant_vector8_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p",
+    NULL
+    },
+    {
+        "constant_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp",
+    NULL
+    },
+    {
+        "constant_vector8_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d",
+        NULL
+    },
+    {
+        "constant_vector16_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p",
+        NULL
+    },
+    {
+        "constant_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d",
+        NULL
+    },
+    {
+        "constant_derived_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep",
+        NULL
+    },
+    {
+        "constant_derived_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed",
+        NULL
+    },
+    {
+        "const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed",
+        NULL
+    },
 };
 
-static const std::vector<cl_kernel_arg_access_qualifier> access_qualifiers = {
-    CL_KERNEL_ARG_ACCESS_READ_WRITE, CL_KERNEL_ARG_ACCESS_READ_ONLY,
-    CL_KERNEL_ARG_ACCESS_WRITE_ONLY
+// Support for optional image data type
+const char * image_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
+    "kernel void image_d(read_only image2d_t image2d_td0,\n"
+    "                    write_only image2d_t image2d_td1,\n"
+    "                    read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3,\n"
+    "                    read_only image2d_array_t image2d_array_td4,\n"
+    "                    write_only image2d_array_t image2d_array_td5,\n"
+    "                    read_only image1d_t image1d_td6,\n"
+    "                    write_only image1d_t image1d_td7,\n"
+    "                    read_only image1d_buffer_t image1d_buffer_td8,\n"
+    "                    write_only image1d_buffer_t image1d_buffer_td9,\n"
+    "                    read_only image1d_array_t image1d_array_td10,\n"
+    "                    write_only image1d_array_t image1d_array_td11,\n"
+    "                    sampler_t sampler_td12)\n"
+    "{}\n",
+    "\n"
 };
 
-static const std::vector<cl_kernel_arg_type_qualifier> type_qualifiers = {
-    CL_KERNEL_ARG_TYPE_NONE,
-    CL_KERNEL_ARG_TYPE_CONST,
-    CL_KERNEL_ARG_TYPE_VOLATILE,
-    CL_KERNEL_ARG_TYPE_RESTRICT,
-    CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE,
-    CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_RESTRICT,
-    CL_KERNEL_ARG_TYPE_VOLATILE | CL_KERNEL_ARG_TYPE_RESTRICT,
-    CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE
-        | CL_KERNEL_ARG_TYPE_RESTRICT,
+const char * image_arg_info[][67] = {
+    {
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12",
+        NULL
+    },
 };
 
-static const std::vector<cl_kernel_arg_type_qualifier> pipe_qualifiers = {
-    CL_KERNEL_ARG_TYPE_PIPE,
-    CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_PIPE,
-    CL_KERNEL_ARG_TYPE_VOLATILE | CL_KERNEL_ARG_TYPE_PIPE,
-    CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE
-        | CL_KERNEL_ARG_TYPE_PIPE,
+// Support for optional double data type
+const char * double_kernel_args[] = {
+    "kernel void double_scalar_p(constant double*constantdoublep,\n"
+    "                            constant double *restrict constantdoublerestrictp,\n"
+    "                            global double*globaldoublep,\n"
+    "                            global double *restrict globaldoublerestrictp,\n"
+    "                            global const double* globalconstdoublep,\n"
+    "                            global const double * restrict globalconstdoublerestrictp,\n"
+    "                            global volatile double*globalvolatiledoublep,\n"
+    "                            global volatile double *restrict globalvolatiledoublerestrictp,\n"
+    "                            global const volatile double* globalconstvolatiledoublep)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n"
+    "                             local double*localdoublep,\n"
+    "                             local double *restrict localdoublerestrictp,\n"
+    "                             local const double* localconstdoublep,\n"
+    "                             local const double * restrict localconstdoublerestrictp,\n"
+    "                             local volatile double*localvolatiledoublep,\n"
+    "                             local volatile double *restrict localvolatiledoublerestrictp,\n"
+    "                             local const volatile double* localconstvolatiledoublep,\n"
+    "                             local const volatile double * restrict localconstvolatiledoublerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_d(double doubled,\n"
+    "                            const double constdoubled,\n"
+    "                            private double privatedoubled,\n"
+    "                            private const double privateconstdoubled)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p(constant double2*constantdouble2p,\n"
+    "                             constant double2 *restrict constantdouble2restrictp,\n"
+    "                             global double2*globaldouble2p,\n"
+    "                             global double2 *restrict globaldouble2restrictp,\n"
+    "                             global const double2* globalconstdouble2p,\n"
+    "                             global const double2 * restrict globalconstdouble2restrictp,\n"
+    "                             global volatile double2*globalvolatiledouble2p,\n"
+    "                             global volatile double2 *restrict globalvolatiledouble2restrictp,\n"
+    "                             global const volatile double2* globalconstvolatiledouble2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n"
+    "                              local double2*localdouble2p,\n"
+    "                              local double2 *restrict localdouble2restrictp,\n"
+    "                              local const double2* localconstdouble2p,\n"
+    "                              local const double2 * restrict localconstdouble2restrictp,\n"
+    "                              local volatile double2*localvolatiledouble2p,\n"
+    "                              local volatile double2 *restrict localvolatiledouble2restrictp,\n"
+    "                              local const volatile double2* localconstvolatiledouble2p,\n"
+    "                              local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_d(double2 double2d,\n"
+    "                             const double2 constdouble2d,\n"
+    "                             private double2 privatedouble2d,\n"
+    "                             private const double2 privateconstdouble2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p(constant double3*constantdouble3p,\n"
+    "                             constant double3 *restrict constantdouble3restrictp,\n"
+    "                             global double3*globaldouble3p,\n"
+    "                             global double3 *restrict globaldouble3restrictp,\n"
+    "                             global const double3* globalconstdouble3p,\n"
+    "                             global const double3 * restrict globalconstdouble3restrictp,\n"
+    "                             global volatile double3*globalvolatiledouble3p,\n"
+    "                             global volatile double3 *restrict globalvolatiledouble3restrictp,\n"
+    "                             global const volatile double3* globalconstvolatiledouble3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n"
+    "                              local double3*localdouble3p,\n"
+    "                              local double3 *restrict localdouble3restrictp,\n"
+    "                              local const double3* localconstdouble3p,\n"
+    "                              local const double3 * restrict localconstdouble3restrictp,\n"
+    "                              local volatile double3*localvolatiledouble3p,\n"
+    "                              local volatile double3 *restrict localvolatiledouble3restrictp,\n"
+    "                              local const volatile double3* localconstvolatiledouble3p,\n"
+    "                              local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_d(double3 double3d,\n"
+    "                             const double3 constdouble3d,\n"
+    "                             private double3 privatedouble3d,\n"
+    "                             private const double3 privateconstdouble3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p(constant double4*constantdouble4p,\n"
+    "                             constant double4 *restrict constantdouble4restrictp,\n"
+    "                             global double4*globaldouble4p,\n"
+    "                             global double4 *restrict globaldouble4restrictp,\n"
+    "                             global const double4* globalconstdouble4p,\n"
+    "                             global const double4 * restrict globalconstdouble4restrictp,\n"
+    "                             global volatile double4*globalvolatiledouble4p,\n"
+    "                             global volatile double4 *restrict globalvolatiledouble4restrictp,\n"
+    "                             global const volatile double4* globalconstvolatiledouble4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n"
+    "                              local double4*localdouble4p,\n"
+    "                              local double4 *restrict localdouble4restrictp,\n"
+    "                              local const double4* localconstdouble4p,\n"
+    "                              local const double4 * restrict localconstdouble4restrictp,\n"
+    "                              local volatile double4*localvolatiledouble4p,\n"
+    "                              local volatile double4 *restrict localvolatiledouble4restrictp,\n"
+    "                              local const volatile double4* localconstvolatiledouble4p,\n"
+    "                              local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_d(double4 double4d,\n"
+    "                             const double4 constdouble4d,\n"
+    "                             private double4 privatedouble4d,\n"
+    "                             private const double4 privateconstdouble4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p(constant double8*constantdouble8p,\n"
+    "                             constant double8 *restrict constantdouble8restrictp,\n"
+    "                             global double8*globaldouble8p,\n"
+    "                             global double8 *restrict globaldouble8restrictp,\n"
+    "                             global const double8* globalconstdouble8p,\n"
+    "                             global const double8 * restrict globalconstdouble8restrictp,\n"
+    "                             global volatile double8*globalvolatiledouble8p,\n"
+    "                             global volatile double8 *restrict globalvolatiledouble8restrictp,\n"
+    "                             global const volatile double8* globalconstvolatiledouble8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n"
+    "                              local double8*localdouble8p,\n"
+    "                              local double8 *restrict localdouble8restrictp,\n"
+    "                              local const double8* localconstdouble8p,\n"
+    "                              local const double8 * restrict localconstdouble8restrictp,\n"
+    "                              local volatile double8*localvolatiledouble8p,\n"
+    "                              local volatile double8 *restrict localvolatiledouble8restrictp,\n"
+    "                              local const volatile double8* localconstvolatiledouble8p,\n"
+    "                              local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_d(double8 double8d,\n"
+    "                             const double8 constdouble8d,\n"
+    "                             private double8 privatedouble8d,\n"
+    "                             private const double8 privateconstdouble8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p(constant double16*constantdouble16p,\n"
+    "                              constant double16 *restrict constantdouble16restrictp,\n"
+    "                              global double16*globaldouble16p,\n"
+    "                              global double16 *restrict globaldouble16restrictp,\n"
+    "                              global const double16* globalconstdouble16p,\n"
+    "                              global const double16 * restrict globalconstdouble16restrictp,\n"
+    "                              global volatile double16*globalvolatiledouble16p,\n"
+    "                              global volatile double16 *restrict globalvolatiledouble16restrictp,\n"
+    "                              global const volatile double16* globalconstvolatiledouble16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n"
+    "                               local double16*localdouble16p,\n"
+    "                               local double16 *restrict localdouble16restrictp,\n"
+    "                               local const double16* localconstdouble16p,\n"
+    "                               local const double16 * restrict localconstdouble16restrictp,\n"
+    "                               local volatile double16*localvolatiledouble16p,\n"
+    "                               local volatile double16 *restrict localvolatiledouble16restrictp,\n"
+    "                               local const volatile double16* localconstvolatiledouble16p,\n"
+    "                               local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_d(double16 double16d,\n"
+    "                              const double16 constdouble16d,\n"
+    "                              private double16 privatedouble16d,\n"
+    "                              private const double16 privateconstdouble16d)\n"
+    "{}\n",
+    "\n"
 };
 
-static std::string
-get_address_qualifier(cl_kernel_arg_address_qualifier address_qualifier)
-{
-    std::string ret;
-    if (address_qualifier == CL_KERNEL_ARG_ADDRESS_GLOBAL)
-        ret = "global";
-    else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_CONSTANT)
-        ret = "constant";
-    else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_LOCAL)
-        ret = "local";
-    else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_PRIVATE)
-        ret = "private";
-    return ret;
-}
-
-static std::string
-get_access_qualifier(cl_kernel_arg_access_qualifier qualifier)
-{
-    std::string ret;
-    if (qualifier == CL_KERNEL_ARG_ACCESS_READ_ONLY) ret = "read_only";
-    if (qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY) ret = "write_only";
-    if (qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE) ret = "read_write";
-    return ret;
-}
-
-static std::string
-get_type_qualifier_prefix(cl_kernel_arg_type_qualifier type_qualifier)
-{
-    std::string ret;
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_CONST) ret += "const ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_VOLATILE) ret += "volatile ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE) ret += "pipe ";
-    return ret;
-}
-
-static std::string
-get_type_qualifier_postfix(cl_kernel_arg_type_qualifier type_qualifier)
-{
-    std::string ret;
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT) ret = "restrict";
-    return ret;
-}
-
-class KernelArgInfo {
-public:
-    KernelArgInfo(cl_kernel_arg_address_qualifier input_address_qualifier,
-                  cl_kernel_arg_access_qualifier input_access_qualifier,
-                  cl_kernel_arg_type_qualifier input_type_qualifier,
-                  const std::string& input_arg_type, const int argument_number,
-                  const std::string& input_arg_string = "")
-        : address_qualifier(input_address_qualifier),
-          access_qualifier(input_access_qualifier),
-          type_qualifier(input_type_qualifier), arg_string(input_arg_string)
-    {
-        strcpy(arg_type, input_arg_type.c_str());
-        std::string input_arg_name =
-            KERNEL_ARGUMENT_NAME + std::to_string(argument_number);
-        strcpy(arg_name, input_arg_name.c_str());
-    };
-    KernelArgInfo() = default;
-    cl_kernel_arg_address_qualifier address_qualifier;
-    cl_kernel_arg_access_qualifier access_qualifier;
-    cl_kernel_arg_type_qualifier type_qualifier;
-    char arg_type[KERNEL_ARGUMENT_LENGTH];
-    char arg_name[KERNEL_ARGUMENT_LENGTH];
-    std::string arg_string;
+// Support for optional 3D image data type
+const char * image_3D_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
+    "kernel void image_d(read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3)\n"
+    "{}\n",
+    "\n"
 };
 
-static std::string generate_argument(const KernelArgInfo& kernel_arg)
-{
-    std::string ret;
-
-    const bool is_image = strstr(kernel_arg.arg_type, "image")
-        || strstr(kernel_arg.arg_type, "sampler");
-    std::string address_qualifier = "";
-    // Image Objects are always allocated from the global address space so the
-    // qualifier should not be specified
-    if (!is_image)
+const char * image_3D_arg_info[][67] = {
     {
-        address_qualifier = get_address_qualifier(kernel_arg.address_qualifier);
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        NULL
+    },
+};
+
+const char * double_arg_info[][77] = {
+    {
+        "double_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep",
+        NULL
+    },
+    {
+        "double_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp",
+        NULL
+    },
+    {
+        "double_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled",
+        NULL
+    },
+    {
+        "double_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p",
+        NULL
+    },
+    {
+        "double_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp",
+        NULL
+    },
+    {
+        "double_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d",
+        NULL
+    },
+    {
+        "double_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p",
+        NULL
+    },
+    {
+        "double_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp",
+        NULL
+    },
+    {
+        "double_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d",
+        NULL
+    },
+    {
+        "double_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p",
+        NULL
+    },
+    {
+        "double_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp",
+        NULL
+    },
+    {
+        "double_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d",
+        NULL
+    },
+    {
+        "double_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p",
+        NULL
+    },
+    {
+        "double_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp",
+        NULL
+    },
+    {
+        "double_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d",
+        NULL
+    },
+    {
+        "double_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p",
+        NULL
+    },
+    {
+        "double_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp",
+        NULL
+    },
+    {
+        "double_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d",
+        NULL
+    },
+};
+
+
+// Support for optional half data type
+const char * half_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
+    "\n"
+    "kernel void half_scalar_p(constant half*constanthalfp,\n"
+    "                          constant half *restrict constanthalfrestrictp,\n"
+    "                          global half*globalhalfp,\n"
+    "                          global half *restrict globalhalfrestrictp,\n"
+    "                          global const half* globalconsthalfp,\n"
+    "                          global const half * restrict globalconsthalfrestrictp,\n"
+    "                          global volatile half*globalvolatilehalfp,\n"
+    "                          global volatile half *restrict globalvolatilehalfrestrictp,\n"
+    "                          global const volatile half* globalconstvolatilehalfp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n"
+    "                           local half*localhalfp,\n"
+    "                           local half *restrict localhalfrestrictp,\n"
+    "                           local const half* localconsthalfp,\n"
+    "                           local const half * restrict localconsthalfrestrictp,\n"
+    "                           local volatile half*localvolatilehalfp,\n"
+    "                           local volatile half *restrict localvolatilehalfrestrictp,\n"
+    "                           local const volatile half* localconstvolatilehalfp,\n"
+    "                           local const volatile half * restrict localconstvolatilehalfrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p(constant half2*constanthalf2p,\n"
+    "                           constant half2 *restrict constanthalf2restrictp,\n"
+    "                           global half2*globalhalf2p,\n"
+    "                           global half2 *restrict globalhalf2restrictp,\n"
+    "                           global const half2* globalconsthalf2p,\n"
+    "                           global const half2 * restrict globalconsthalf2restrictp,\n"
+    "                           global volatile half2*globalvolatilehalf2p,\n"
+    "                           global volatile half2 *restrict globalvolatilehalf2restrictp,\n"
+    "                           global const volatile half2* globalconstvolatilehalf2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n"
+    "                            local half2*localhalf2p,\n"
+    "                            local half2 *restrict localhalf2restrictp,\n"
+    "                            local const half2* localconsthalf2p,\n"
+    "                            local const half2 * restrict localconsthalf2restrictp,\n"
+    "                            local volatile half2*localvolatilehalf2p,\n"
+    "                            local volatile half2 *restrict localvolatilehalf2restrictp,\n"
+    "                            local const volatile half2* localconstvolatilehalf2p,\n"
+    "                            local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p(constant half3*constanthalf3p,\n"
+    "                           constant half3 *restrict constanthalf3restrictp,\n"
+    "                           global half3*globalhalf3p,\n"
+    "                           global half3 *restrict globalhalf3restrictp,\n"
+    "                           global const half3* globalconsthalf3p,\n"
+    "                           global const half3 * restrict globalconsthalf3restrictp,\n"
+    "                           global volatile half3*globalvolatilehalf3p,\n"
+    "                           global volatile half3 *restrict globalvolatilehalf3restrictp,\n"
+    "                           global const volatile half3* globalconstvolatilehalf3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n"
+    "                            local half3*localhalf3p,\n"
+    "                            local half3 *restrict localhalf3restrictp,\n"
+    "                            local const half3* localconsthalf3p,\n"
+    "                            local const half3 * restrict localconsthalf3restrictp,\n"
+    "                            local volatile half3*localvolatilehalf3p,\n"
+    "                            local volatile half3 *restrict localvolatilehalf3restrictp,\n"
+    "                            local const volatile half3* localconstvolatilehalf3p,\n"
+    "                            local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p(constant half4*constanthalf4p,\n"
+    "                           constant half4 *restrict constanthalf4restrictp,\n"
+    "                           global half4*globalhalf4p,\n"
+    "                           global half4 *restrict globalhalf4restrictp,\n"
+    "                           global const half4* globalconsthalf4p,\n"
+    "                           global const half4 * restrict globalconsthalf4restrictp,\n"
+    "                           global volatile half4*globalvolatilehalf4p,\n"
+    "                           global volatile half4 *restrict globalvolatilehalf4restrictp,\n"
+    "                           global const volatile half4* globalconstvolatilehalf4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n"
+    "                            local half4*localhalf4p,\n"
+    "                            local half4 *restrict localhalf4restrictp,\n"
+    "                            local const half4* localconsthalf4p,\n"
+    "                            local const half4 * restrict localconsthalf4restrictp,\n"
+    "                            local volatile half4*localvolatilehalf4p,\n"
+    "                            local volatile half4 *restrict localvolatilehalf4restrictp,\n"
+    "                            local const volatile half4* localconstvolatilehalf4p,\n"
+    "                            local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p(constant half8*constanthalf8p,\n"
+    "                           constant half8 *restrict constanthalf8restrictp,\n"
+    "                           global half8*globalhalf8p,\n"
+    "                           global half8 *restrict globalhalf8restrictp,\n"
+    "                           global const half8* globalconsthalf8p,\n"
+    "                           global const half8 * restrict globalconsthalf8restrictp,\n"
+    "                           global volatile half8*globalvolatilehalf8p,\n"
+    "                           global volatile half8 *restrict globalvolatilehalf8restrictp,\n"
+    "                           global const volatile half8* globalconstvolatilehalf8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n"
+    "                            local half8*localhalf8p,\n"
+    "                            local half8 *restrict localhalf8restrictp,\n"
+    "                            local const half8* localconsthalf8p,\n"
+    "                            local const half8 * restrict localconsthalf8restrictp,\n"
+    "                            local volatile half8*localvolatilehalf8p,\n"
+    "                            local volatile half8 *restrict localvolatilehalf8restrictp,\n"
+    "                            local const volatile half8* localconstvolatilehalf8p,\n"
+    "                            local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p(constant half16*constanthalf16p,\n"
+    "                            constant half16 *restrict constanthalf16restrictp,\n"
+    "                            global half16*globalhalf16p,\n"
+    "                            global half16 *restrict globalhalf16restrictp,\n"
+    "                            global const half16* globalconsthalf16p,\n"
+    "                            global const half16 * restrict globalconsthalf16restrictp,\n"
+    "                            global volatile half16*globalvolatilehalf16p,\n"
+    "                            global volatile half16 *restrict globalvolatilehalf16restrictp,\n"
+    "                            global const volatile half16* globalconstvolatilehalf16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n"
+    "                             local half16*localhalf16p,\n"
+    "                             local half16 *restrict localhalf16restrictp,\n"
+    "                             local const half16* localconsthalf16p,\n"
+    "                             local const half16 * restrict localconsthalf16restrictp,\n"
+    "                             local volatile half16*localvolatilehalf16p,\n"
+    "                             local volatile half16 *restrict localvolatilehalf16restrictp,\n"
+    "                             local const volatile half16* localconstvolatilehalf16p,\n"
+    "                             local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * half_arg_info[][77] = {
+    {
+        "half_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp",
+        NULL
+    },
+    {
+        "half_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp",
+        NULL
+    },
+    {
+        "half_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p",
+        NULL
+    },
+    {
+        "half_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp",
+        NULL
+    },
+    {
+        "half_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p",
+        NULL
+    },
+    {
+        "half_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp",
+        NULL
+    },
+    {
+        "half_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p",
+        NULL
+    },
+    {
+        "half_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp",
+        NULL
+    },
+    {
+        "half_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p",
+        NULL
+    },
+    {
+        "half_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp",
+        NULL
+    },
+    {
+        "half_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p",
+        NULL
+    },
+    {
+        "half_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp",
+        NULL
+    },
+};
+
+const char * long_kernel_args[] = {
+        "kernel void constant_scalar_p2(constant long* constantlongp,\n"
+        "                              constant ulong * constantulongp)\n"
+      "{}\n",
+        "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_scalar_restrict_p2(constant long*restrict constantlongrestrictp,\n"
+        "                                       constant ulong *restrict constantulongrestrictp)\n"
+        "{}\n",
+        "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_scalar_p(global long* globallongp,\n"
+        "                            global ulong * globalulongp,\n"
+        "                            global unsigned long*globalunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_scalar_restrict_p(global long*restrict globallongrestrictp,\n"
+        "                                     global ulong *restrict globalulongrestrictp,\n"
+        "                                     global unsigned long* restrict globalunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_scalar_p(global const long* globalconstlongp,\n"
+        "                                  global const ulong * globalconstulongp,\n"
+        "                                  global const unsigned long*globalconstunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_scalar_restrict_p(global const long*restrict globalconstlongrestrictp,\n"
+        "                                           global const ulong *restrict globalconstulongrestrictp,\n"
+        "                                           global const unsigned long* restrict globalconstunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_scalar_p(global volatile long* globalvolatilelongp,\n"
+        "                                     global volatile ulong * globalvolatileulongp,\n"
+        "                                     global volatile unsigned long*globalvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_scalar_restrict_p(global volatile long*restrict globalvolatilelongrestrictp,\n"
+        "                                              global volatile ulong *restrict globalvolatileulongrestrictp,\n"
+        "                                              global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_scalar_p(global const volatile long* globalconstvolatilelongp,\n"
+        "                                           global const volatile ulong * globalconstvolatileulongp,\n"
+        "                                           global const volatile unsigned long*globalconstvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_scalar_restrict_p(global const volatile long*restrict globalconstvolatilelongrestrictp,\n"
+        "                                                    global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n"
+        "                                                    global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_scalar_p(local long* locallongp,\n"
+        "                           local ulong * localulongp,\n"
+        "                           local unsigned long*localunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_scalar_restrict_p(local long*restrict locallongrestrictp,\n"
+        "                                    local ulong *restrict localulongrestrictp,\n"
+        "                                    local unsigned long* restrict localunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_scalar_p(local const long* localconstlongp,\n"
+        "                                 local const ulong * localconstulongp,\n"
+        "                                 local const unsigned long*localconstunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_scalar_restrict_p(local const long*restrict localconstlongrestrictp,\n"
+        "                                          local const ulong *restrict localconstulongrestrictp,\n"
+        "                                          local const unsigned long* restrict localconstunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_scalar_p(local volatile long* localvolatilelongp,\n"
+        "                                    local volatile ulong * localvolatileulongp,\n"
+        "                                    local volatile unsigned long*localvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_scalar_restrict_p(local volatile long*restrict localvolatilelongrestrictp,\n"
+        "                                             local volatile ulong *restrict localvolatileulongrestrictp,\n"
+        "                                             local volatile unsigned long* restrict localvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_scalar_p(local const volatile long* localconstvolatilelongp,\n"
+        "                                          local const volatile ulong * localconstvolatileulongp,\n"
+        "                                          local const volatile unsigned long*localconstvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_scalar_restrict_p(local const volatile long*restrict localconstvolatilelongrestrictp,\n"
+        "                                                   local const volatile ulong *restrict localconstvolatileulongrestrictp,\n"
+        "                                                   local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void scalar_d(long longd,\n"
+        "                     ulong ulongd,\n"
+        "                     unsigned long unsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_scalar_d(const long constlongd,\n"
+        "                           const ulong constulongd,\n"
+        "                           const unsigned long constunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_scalar_d(private long privatelongd,\n"
+        "                             private ulong privateulongd,\n"
+        "                             private unsigned long privateunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_scalar_d(private const long privateconstlongd,\n"
+        "                                   private const ulong privateconstulongd,\n"
+        "                                   private const unsigned long privateconstunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector2_p1(constant long2* constantlong2p,\n"
+        "                               constant ulong2 * constantulong2p)\n"
+      "{}\n",
+        "\n"
+        "kernel void constant_vector2_restrict_p1(constant long2 * restrict constantlong2restrictp,\n"
+        "                                        constant ulong2*restrict constantulong2restrictp)\n"
+      "{}\n",
+        "\n"
+        "kernel void global_vector2_p(global long2* globallong2p,\n"
+        "                             global ulong2 * globalulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector2_restrict_p(global long2 * restrict globallong2restrictp,\n"
+        "                                      global ulong2*restrict globalulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector2_p(global const long2*globalconstlong2p,\n"
+        "                                   global const ulong2 *globalconstulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector2_restrict_p(global const long2 *restrict globalconstlong2restrictp,\n"
+        "                                            global const ulong2* restrict globalconstulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector2_p(global volatile long2* globalvolatilelong2p,\n"
+        "                                      global volatile ulong2 * globalvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector2_restrict_p(global volatile long2 * restrict globalvolatilelong2restrictp,\n"
+        "                                               global volatile ulong2*restrict globalvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector2_p(global const volatile long2*globalconstvolatilelong2p,\n"
+        "                                            global const volatile ulong2 *globalconstvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector2_restrict_p(global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n"
+        "                                                     global const volatile ulong2* restrict globalconstvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector2_p(local long2* locallong2p,\n"
+        "                            local ulong2 * localulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector2_restrict_p(local long2 * restrict locallong2restrictp,\n"
+        "                                     local ulong2*restrict localulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector2_p(local const long2*localconstlong2p,\n"
+        "                                  local const ulong2 *localconstulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector2_restrict_p(local const long2 *restrict localconstlong2restrictp,\n"
+        "                                           local const ulong2* restrict localconstulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector2_p(local volatile long2* localvolatilelong2p,\n"
+        "                                     local volatile ulong2 * localvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector2_restrict_p(local volatile long2 * restrict localvolatilelong2restrictp,\n"
+        "                                              local volatile ulong2*restrict localvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector2_p(local const volatile long2*localconstvolatilelong2p,\n"
+        "                                           local const volatile ulong2 *localconstvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector2_restrict_p(local const volatile long2 *restrict localconstvolatilelong2restrictp,\n"
+        "                                                    local const volatile ulong2* restrict localconstvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector2_d(long2 long2d,\n"
+        "                      ulong2 ulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector2_d(const long2 constlong2d,\n"
+        "                            const ulong2 constulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector2_d(private long2 privatelong2d,\n"
+        "                              private ulong2 privateulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector2_d(private const long2 privateconstlong2d,\n"
+        "                                    private const ulong2 privateconstulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector3_p1(constant long3* constantlong3p,\n"
+        "                               constant ulong3 * constantulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector3_restrict_p1(constant long3 * restrict constantlong3restrictp,\n"
+        "                                        constant ulong3*restrict constantulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector3_p(global long3* globallong3p,\n"
+        "                             global ulong3 * globalulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector3_restrict_p(global long3 * restrict globallong3restrictp,\n"
+        "                                      global ulong3*restrict globalulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector3_p(global const long3*globalconstlong3p,\n"
+        "                                   global const ulong3 *globalconstulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector3_restrict_p(global const long3 *restrict globalconstlong3restrictp,\n"
+        "                                            global const ulong3* restrict globalconstulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector3_p(global volatile long3* globalvolatilelong3p,\n"
+        "                                      global volatile ulong3 * globalvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector3_restrict_p(global volatile long3 * restrict globalvolatilelong3restrictp,\n"
+        "                                               global volatile ulong3*restrict globalvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector3_p(global const volatile long3*globalconstvolatilelong3p,\n"
+        "                                            global const volatile ulong3 *globalconstvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector3_restrict_p(global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n"
+        "                                                     global const volatile ulong3* restrict globalconstvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector3_p(local long3* locallong3p,\n"
+        "                            local ulong3 * localulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector3_restrict_p(local long3 * restrict locallong3restrictp,\n"
+        "                                     local ulong3*restrict localulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector3_p(local const long3*localconstlong3p,\n"
+        "                                  local const ulong3 *localconstulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector3_restrict_p(local const long3 *restrict localconstlong3restrictp,\n"
+        "                                           local const ulong3* restrict localconstulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector3_p(local volatile long3* localvolatilelong3p,\n"
+        "                                     local volatile ulong3 * localvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector3_restrict_p(local volatile long3 * restrict localvolatilelong3restrictp,\n"
+        "                                              local volatile ulong3*restrict localvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector3_p(local const volatile long3*localconstvolatilelong3p,\n"
+        "                                           local const volatile ulong3 *localconstvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector3_restrict_p(local const volatile long3 *restrict localconstvolatilelong3restrictp,\n"
+        "                                                    local const volatile ulong3* restrict localconstvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector3_d(long3 long3d,\n"
+        "                      ulong3 ulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector3_d(const long3 constlong3d,\n"
+        "                            const ulong3 constulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector3_d(private long3 privatelong3d,\n"
+        "                              private ulong3 privateulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector3_d(private const long3 privateconstlong3d,\n"
+        "                                    private const ulong3 privateconstulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector4_p1(constant long4* constantlong4p,\n"
+        "                               constant ulong4 * constantulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector4_restrict_p1(constant long4 * restrict constantlong4restrictp,\n"
+        "                                        constant ulong4*restrict constantulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector4_p(global long4* globallong4p,\n"
+        "                             global ulong4 * globalulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector4_restrict_p(global long4 * restrict globallong4restrictp,\n"
+        "                                      global ulong4*restrict globalulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector4_p(global const long4*globalconstlong4p,\n"
+        "                                   global const ulong4 *globalconstulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector4_restrict_p(global const long4 *restrict globalconstlong4restrictp,\n"
+        "                                            global const ulong4* restrict globalconstulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector4_p(global volatile long4* globalvolatilelong4p,\n"
+        "                                      global volatile ulong4 * globalvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector4_restrict_p(global volatile long4 * restrict globalvolatilelong4restrictp,\n"
+        "                                               global volatile ulong4*restrict globalvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector4_p(global const volatile long4*globalconstvolatilelong4p,\n"
+        "                                            global const volatile ulong4 *globalconstvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector4_restrict_p(global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n"
+        "                                                     global const volatile ulong4* restrict globalconstvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector4_p(local long4* locallong4p,\n"
+        "                            local ulong4 * localulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector4_restrict_p(local long4 * restrict locallong4restrictp,\n"
+        "                                     local ulong4*restrict localulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector4_p(local const long4*localconstlong4p,\n"
+        "                                  local const ulong4 *localconstulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector4_restrict_p(local const long4 *restrict localconstlong4restrictp,\n"
+        "                                           local const ulong4* restrict localconstulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector4_p(local volatile long4* localvolatilelong4p,\n"
+        "                                     local volatile ulong4 * localvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector4_restrict_p(local volatile long4 * restrict localvolatilelong4restrictp,\n"
+        "                                              local volatile ulong4*restrict localvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector4_p(local const volatile long4*localconstvolatilelong4p,\n"
+        "                                           local const volatile ulong4 *localconstvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector4_restrict_p(local const volatile long4 *restrict localconstvolatilelong4restrictp,\n"
+        "                                                    local const volatile ulong4* restrict localconstvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector4_d(long4 long4d,\n"
+        "                      ulong4 ulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector4_d(const long4 constlong4d,\n"
+        "                            const ulong4 constulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector4_d(private long4 privatelong4d,\n"
+        "                              private ulong4 privateulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector4_d(private const long4 privateconstlong4d,\n"
+        "                                    private const ulong4 privateconstulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector8_p1(constant long8* constantlong8p,\n"
+        "                               constant ulong8 * constantulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector8_restrict_p1(constant long8 * restrict constantlong8restrictp,\n"
+        "                                        constant ulong8*restrict constantulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector8_p(global long8* globallong8p,\n"
+        "                             global ulong8 * globalulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector8_restrict_p(global long8 * restrict globallong8restrictp,\n"
+        "                                      global ulong8*restrict globalulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector8_p(global const long8*globalconstlong8p,\n"
+        "                                   global const ulong8 *globalconstulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector8_restrict_p(global const long8 *restrict globalconstlong8restrictp,\n"
+        "                                            global const ulong8* restrict globalconstulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector8_p(global volatile long8* globalvolatilelong8p,\n"
+        "                                      global volatile ulong8 * globalvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector8_restrict_p(global volatile long8 * restrict globalvolatilelong8restrictp,\n"
+        "                                               global volatile ulong8*restrict globalvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector8_p(global const volatile long8*globalconstvolatilelong8p,\n"
+        "                                            global const volatile ulong8 *globalconstvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector8_restrict_p(global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n"
+        "                                                     global const volatile ulong8* restrict globalconstvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector8_p(local long8* locallong8p,\n"
+        "                            local ulong8 * localulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector8_restrict_p(local long8 * restrict locallong8restrictp,\n"
+        "                                     local ulong8*restrict localulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector8_p(local const long8*localconstlong8p,\n"
+        "                                  local const ulong8 *localconstulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector8_restrict_p(local const long8 *restrict localconstlong8restrictp,\n"
+        "                                           local const ulong8* restrict localconstulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector8_p(local volatile long8* localvolatilelong8p,\n"
+        "                                     local volatile ulong8 * localvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector8_restrict_p(local volatile long8 * restrict localvolatilelong8restrictp,\n"
+        "                                              local volatile ulong8*restrict localvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector8_p(local const volatile long8*localconstvolatilelong8p,\n"
+        "                                           local const volatile ulong8 *localconstvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector8_restrict_p(local const volatile long8 *restrict localconstvolatilelong8restrictp,\n"
+        "                                                    local const volatile ulong8* restrict localconstvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector8_d(long8 long8d,\n"
+        "                      ulong8 ulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector8_d(const long8 constlong8d,\n"
+        "                            const ulong8 constulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector8_d(private long8 privatelong8d,\n"
+        "                              private ulong8 privateulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector8_d(private const long8 privateconstlong8d,\n"
+        "                                    private const ulong8 privateconstulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector16_p1(constant long16* constantlong16p,\n"
+        "                                constant ulong16 * constantulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector16_restrict_p1(constant long16 * restrict constantlong16restrictp,\n"
+        "                                         constant ulong16*restrict constantulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector16_p(global long16* globallong16p,\n"
+        "                              global ulong16 * globalulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector16_restrict_p(global long16 * restrict globallong16restrictp,\n"
+        "                                       global ulong16*restrict globalulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector16_p(global const long16*globalconstlong16p,\n"
+        "                                    global const ulong16 *globalconstulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector16_restrict_p(global const long16 *restrict globalconstlong16restrictp,\n"
+        "                                             global const ulong16* restrict globalconstulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector16_p(global volatile long16* globalvolatilelong16p,\n"
+        "                                       global volatile ulong16 * globalvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector16_restrict_p(global volatile long16 * restrict globalvolatilelong16restrictp,\n"
+        "                                                global volatile ulong16*restrict globalvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector16_p(global const volatile long16*globalconstvolatilelong16p,\n"
+        "                                             global const volatile ulong16 *globalconstvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector16_restrict_p(global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n"
+        "                                                      global const volatile ulong16* restrict globalconstvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector16_p(local long16* locallong16p,\n"
+        "                             local ulong16 * localulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector16_restrict_p(local long16 * restrict locallong16restrictp,\n"
+        "                                      local ulong16*restrict localulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector16_p(local const long16*localconstlong16p,\n"
+        "                                   local const ulong16 *localconstulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector16_restrict_p(local const long16 *restrict localconstlong16restrictp,\n"
+        "                                            local const ulong16* restrict localconstulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector16_p(local volatile long16* localvolatilelong16p,\n"
+        "                                      local volatile ulong16 * localvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector16_restrict_p(local volatile long16 * restrict localvolatilelong16restrictp,\n"
+        "                                               local volatile ulong16*restrict localvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector16_p(local const volatile long16*localconstvolatilelong16p,\n"
+        "                                            local const volatile ulong16 *localconstvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector16_restrict_p(local const volatile long16 *restrict localconstvolatilelong16restrictp,\n"
+        "                                                     local const volatile ulong16* restrict localconstvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector16_d(long16 long16d,\n"
+        "                       ulong16 ulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector16_d(const long16 constlong16d,\n"
+        "                             const ulong16 constulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector16_d(private long16 privatelong16d,\n"
+        "                               private ulong16 privateulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector16_d(private const long16 privateconstlong16d,\n"
+        "                                     private const ulong16 privateconstulong16d)\n"
+        "{}\n",
+        "\n"
+};
+
+const char * long_arg_info[][72] = {
+  // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+  {
+        "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp",
+        NULL
+  },
+  {
+        "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp",
+        NULL
+    },
+  {
+        "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd",
+        NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d",
+        NULL
+    },
+};
+
+template<typename arg_info_t>
+int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) {
+
+    const size_t max_name_len = 512;
+    cl_char name[ max_name_len ];
+    cl_uint arg_count, numArgs;
+    size_t i, j, size;
+    int error;
+
+    clProgramWrapper program =
+    clCreateProgramWithSource(context, lines_count, kernel_args, NULL, &error);
+    if ( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create required arguments kernel program" );
+        return -1;
     }
 
-    std::string access_qualifier =
-        get_access_qualifier(kernel_arg.access_qualifier);
-    std::string type_qualifier_prefix =
-        get_type_qualifier_prefix(kernel_arg.type_qualifier);
-    std::string type_qualifier_postfix =
-        get_type_qualifier_postfix(kernel_arg.type_qualifier);
+    // Compile the program
+    log_info( "Building kernels...\n" );
+    clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL );
 
-    ret += address_qualifier + " ";
-    ret += access_qualifier + " ";
-    ret += type_qualifier_prefix + " ";
-    ret += kernel_arg.arg_type;
-    ret += " ";
-    ret += type_qualifier_postfix + " ";
-    ret += kernel_arg.arg_name;
-    return ret;
-}
+    // check for build errors and exit if things didn't work
+    size_t size_ret;
+    cl_build_status build_status;
+    error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret);
+    test_error( error, "Unable to query build status" );
+    if (build_status == CL_BUILD_ERROR) {
+        printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to get build log size" );
+        char *build_log = (char *)malloc(size_ret);
+        error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to get build log" );
+        printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        printf("CL_BUILD_ERROR. exiting\n");
+        free(build_log);
+        return -1;
+    }
 
-/* This function generates a kernel source and allows for multiple arguments to
- * be passed in and subsequently queried. */
-static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
-                                   const bool supports_3d_image_writes = false)
-{
+    // Lookup the number of kernels in the program.
+    log_info( "Testing kernels...\n" );
+    size_t total_kernels = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL );
+    test_error( error, "Unable to get program info num kernels" );
 
-    std::string ret;
-    if (supports_3d_image_writes)
+    if ( total_kernels != total_kernels_in_program )
     {
-        ret += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n";
+        print_error( error, "Program did not build all kernels" );
+        return -1;
     }
-    ret += "kernel void get_kernel_arg_info(\n";
-    for (int i = 0; i < all_args.size(); ++i)
-    {
-        const KernelArgInfo& arg = all_args[i];
-        ret += generate_argument(all_args[i]);
-        if (i == all_args.size() - 1)
-        {
-            ret += "\n";
-        }
-        else
-        {
-            ret += ",\n";
-        }
-    }
-    ret += "){}";
-    return ret;
-}
 
-static const char* get_kernel_arg_address_qualifier(
-    cl_kernel_arg_address_qualifier address_qualifier)
-{
-    switch (address_qualifier)
-    {
-        case CL_KERNEL_ARG_ADDRESS_GLOBAL: {
-            return "GLOBAL";
-        }
-        case CL_KERNEL_ARG_ADDRESS_LOCAL: {
-            return "LOCAL";
-        }
-        case CL_KERNEL_ARG_ADDRESS_CONSTANT: {
-            return "CONSTANT";
-        }
-        default: {
-            return "PRIVATE";
-        }
-    }
-}
+    // Lookup the kernel names.
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len );
+    test_error( error, "Unable to get length of kernel names list." );
 
-static const char*
-get_kernel_arg_access_qualifier(cl_kernel_arg_access_qualifier access_qualifier)
-{
-    switch (access_qualifier)
+    size_t expected_kernel_names_len = 0;
+    for ( i = 0; i < total_kernels; ++i )
     {
-        case CL_KERNEL_ARG_ACCESS_READ_ONLY: {
-            return "READ_ONLY";
-        }
-        case CL_KERNEL_ARG_ACCESS_WRITE_ONLY: {
-            return "WRITE_ONLY";
-        }
-        case CL_KERNEL_ARG_ACCESS_READ_WRITE: {
-            return "READ_WRITE";
-        }
-        default: {
-            return "NONE";
-        }
+        expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] );
     }
-}
+    if ( kernel_names_len != expected_kernel_names_len )
+    {
+        log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len );
+        return -1;
+    }
 
-std::string
-get_kernel_arg_type_qualifier(cl_kernel_arg_type_qualifier type_qualifier)
-{
-    std::string ret;
+    const size_t len = ( kernel_names_len + 1 ) * sizeof( char );
+    char* kernel_names = (char*) malloc( len );
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len );
+    test_error( error, "Unable to get kernel names list." );
 
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_CONST) ret += "CONST ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT) ret += "RESTRICT ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_VOLATILE) ret += "VOLATILE ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE) ret += "PIPE";
+    // Check to see if the kernel name array is null terminated.
+    if ( kernel_names[ kernel_names_len - 1 ] != '\0' )
+    {
+        free( kernel_names );
+        print_error( error, "Kernel name list was not null terminated" );
+        return -1;
+    }
 
-    return ret;
-}
+    // Check to see if the correct kernel name string was returned.
+    // Does the string contain each expected kernel name?
+    for ( i = 0; i < total_kernels; ++i )
+        if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) )
+            break;
+    if ( i != total_kernels )
+    {
+        log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] );
+        free( kernel_names );
+        return -1;
+    }
 
-static void output_difference(const KernelArgInfo& expected,
-                              const KernelArgInfo& actual)
-{
-    if (actual.address_qualifier != expected.address_qualifier)
+    // Are the kernel names delimited by ';'?
+    if ( !strtok( kernel_names, ";" ) )
     {
-        log_error("Address Qualifier: Expected: %s\t Actual: %s\n",
-                  get_kernel_arg_address_qualifier(expected.address_qualifier),
-                  get_kernel_arg_address_qualifier(actual.address_qualifier));
-    }
-    if (actual.access_qualifier != expected.access_qualifier)
-    {
-        log_error("Access Qualifier: Expected: %s\t Actual: %s\n",
-                  get_kernel_arg_access_qualifier(expected.access_qualifier),
-                  get_kernel_arg_access_qualifier(actual.access_qualifier));
-    }
-    if (actual.type_qualifier != expected.type_qualifier)
-    {
-        log_error(
-            "Type Qualifier: Expected: %s\t Actual: %s\n",
-            get_kernel_arg_type_qualifier(expected.type_qualifier).c_str(),
-            get_kernel_arg_type_qualifier(actual.type_qualifier).c_str());
-    }
-    if (strcmp(actual.arg_type, expected.arg_type) != 0)
-    {
-        log_error("Arg Type: Expected: %s\t Actual: %s\n", expected.arg_type,
-                  actual.arg_type);
-    }
-    if (strcmp(actual.arg_name, expected.arg_name) != 0)
-    {
-        log_error("Arg Name: Expected: %s\t Actual: %s\n", expected.arg_name,
-                  actual.arg_name);
-    }
-    log_error("Argument in Kernel Source Reported as:\n%s\n",
-              expected.arg_string.c_str());
-}
-static int compare_expected_actual(const KernelArgInfo& expected,
-                                   const KernelArgInfo& actual)
-{
-    ++gTestCount;
-    int ret = TEST_PASS;
-    if ((actual.address_qualifier != expected.address_qualifier)
-        || (actual.access_qualifier != expected.access_qualifier)
-        || (actual.type_qualifier != expected.type_qualifier)
-        || (strcmp(actual.arg_type, expected.arg_type) != 0)
-        || (strcmp(actual.arg_name, expected.arg_name) != 0))
-    {
-        ret = TEST_FAIL;
-        output_difference(expected, actual);
-        ++gFailCount;
-    }
-    return ret;
-}
-
-static bool device_supports_pipes(cl_device_id deviceID)
-{
-    auto version = get_device_cl_version(deviceID);
-    if (version < MINIMUM_OPENCL_PIPE_VERSION)
-    {
-        return false;
-    }
-    cl_uint max_packet_size = 0;
-    cl_int err =
-        clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_PACKET_SIZE,
-                        sizeof(max_packet_size), &max_packet_size, nullptr);
-    test_error_ret(err, "clGetDeviceInfo", false);
-    if ((max_packet_size == 0) && (version >= Version(3, 0)))
-    {
-        return false;
-    }
-    return true;
-}
-
-static std::string get_build_options(cl_device_id deviceID)
-{
-    std::string ret = "-cl-kernel-arg-info";
-    if (get_device_cl_version(deviceID) >= MINIMUM_OPENCL_PIPE_VERSION)
-    {
-        if (device_supports_pipes(deviceID))
-        {
-            if (get_device_cl_version(deviceID) >= Version(3, 0))
-            {
-                ret += " -cl-std=CL3.0";
-            }
-            else
-            {
-                ret += " -cl-std=CL2.0";
-            }
-        }
-    }
-    return ret;
-}
-
-static std::string get_expected_arg_type(const std::string& type_string,
-                                         const bool is_pointer)
-{
-    bool is_unsigned = false;
-    std::istringstream type_stream(type_string);
-    std::string base_type = "";
-    std::string ret = "";
-    /* Signed and Unsigned on their own represent an int */
-    if (type_string == "signed" || type_string == "signed*")
-    {
-        base_type = "int";
-    }
-    else if (type_string == "unsigned" || type_string == "unsigned*")
-    {
-        base_type = "int";
-        is_unsigned = true;
+        error = -1;
     }
     else
     {
-        std::string token;
-        /* Iterate through the argument type to determine what the type is and
-         * whether or not it is signed */
-        while (std::getline(type_stream, token, ' '))
+        for ( i = 1; i < total_kernels; ++i )
         {
-            if (token.find("unsigned") != std::string::npos)
+            if ( !strtok( NULL, ";" ) )
             {
-                is_unsigned = true;
-            }
-            if (token.find("signed") == std::string::npos)
-            {
-                base_type = token;
+                error = -1;
             }
         }
     }
-    ret = base_type;
-    if (is_unsigned)
+    if ( error )
     {
-        ret.insert(0, "u");
+        log_error( "Kernel names string was not properly delimited by ';'\n" );
+        free( kernel_names );
+        return -1;
     }
-    /* Ensure that the data type is a pointer if it is not already when
-     * necessary */
-    if (is_pointer && ret.back() != '*')
-    {
-        ret += "*";
-    }
-    return ret;
-}
+    free( kernel_names );
 
-static KernelArgInfo
-create_expected_arg_info(const KernelArgInfo& kernel_argument, bool is_pointer)
-{
-    KernelArgInfo ret = kernel_argument;
-    const std::string arg_string = generate_argument(kernel_argument);
-    ret.arg_string = arg_string;
-
-    std::string type_string(kernel_argument.arg_type);
-    /* We only need to modify the expected return values for scalar types */
-    if ((is_pointer && !isdigit(type_string.back() - 1))
-        || !isdigit(type_string.back()))
+    // Create kernel objects and query them.
+    int rc = 0;
+    for ( i = 0; i < total_kernels; ++i )
     {
-        std::string expected_arg_type =
-            get_expected_arg_type(type_string, is_pointer);
-
-        /* Reset the Contents of expected arg_type char[] and then assign it to
-         * the expected value */
-        memset(ret.arg_type, 0, sizeof(ret.arg_type));
-        strcpy(ret.arg_type, expected_arg_type.c_str());
-    }
-
-    /* Any values passed by reference has TYPE_NONE */
-    if (!is_pointer)
-    {
-        ret.type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    }
-
-    /* If the address qualifier is CONSTANT we expect to see the TYPE_CONST
-     * qualifier*/
-    if (kernel_argument.address_qualifier == CL_KERNEL_ARG_ADDRESS_CONSTANT)
-    {
-        ret.type_qualifier |= CL_KERNEL_ARG_TYPE_CONST;
-    }
-
-    /* The PIPE qualifier is special. It can only be used in a global scope. It
-     * also ignores any other qualifiers */
-    if (kernel_argument.type_qualifier & CL_KERNEL_ARG_TYPE_PIPE)
-    {
-        ret.address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
-        ret.type_qualifier = CL_KERNEL_ARG_TYPE_PIPE;
-    }
-
-    return ret;
-}
-
-/* There are too many vector arguments for it to be worth writing down
- * statically and are instead generated here and combined with all of the scalar
- * and unsigned scalar types in a single data structure */
-static std::vector<std::string>
-generate_all_type_arguments(cl_device_id deviceID)
-{
-    std::vector<std::string> ret = {
-        "char",           "short",        "int",           "float",
-        "void",           "uchar",        "unsigned char", "ushort",
-        "unsigned short", "uint",         "unsigned int",  "char unsigned",
-        "short unsigned", "int unsigned", "signed short",  "signed int",
-        "signed long",    "short signed", "int signed",    "signed",
-        "unsigned"
-    };
-
-    std::vector<std::string> vector_types = { "char",   "uchar", "short",
-                                              "ushort", "int",   "uint",
-                                              "float" };
-    if (gHasLong)
-    {
-        ret.push_back("long");
-        ret.push_back("ulong");
-        ret.push_back("unsigned long");
-        ret.push_back("long unsigned");
-        ret.push_back("long signed");
-        vector_types.push_back("long");
-        vector_types.push_back("ulong");
-    }
-    if (device_supports_half(deviceID))
-    {
-        vector_types.push_back("half");
-    }
-    if (device_supports_double(deviceID))
-    {
-        vector_types.push_back("double");
-    }
-    static const std::vector<std::string> vector_values = { "2", "3", "4", "8",
-                                                            "16" };
-    for (auto vector_type : vector_types)
-    {
-        for (auto vector_value : vector_values)
+        int kernel_rc = 0;
+        const char* kernel_name = arg_info[ i ][ 0 ];
+        clKernelWrapper kernel = clCreateKernel(program, kernel_name, &error);
+        if( kernel == NULL || error != CL_SUCCESS )
         {
-            ret.push_back(vector_type + vector_value);
+            log_error( "ERROR: Could not get kernel: %s\n", kernel_name );
+            kernel_rc = -1;
         }
-    }
-    return ret;
-}
 
-static int
-compare_kernel_with_expected(cl_context context, cl_device_id deviceID,
-                             const char* kernel_src,
-                             const std::vector<KernelArgInfo>& expected_args)
-{
-    int failed_tests = 0;
-    clKernelWrapper kernel;
-    clProgramWrapper program;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(deviceID).c_str());
-    test_error(err, "create_single_kernel_helper_with_build_options");
-    for (int i = 0; i < expected_args.size(); ++i)
-    {
-        KernelArgInfo actual;
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER,
-                                 sizeof(actual.address_qualifier),
-                                 &(actual.address_qualifier), nullptr);
-        test_error(err, "clGetKernelArgInfo");
-
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_ACCESS_QUALIFIER,
-                                 sizeof(actual.access_qualifier),
-                                 &(actual.access_qualifier), nullptr);
-        test_error(err, "clGetKernelArgInfo");
-
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_QUALIFIER,
-                                 sizeof(actual.type_qualifier),
-                                 &(actual.type_qualifier), nullptr);
-        test_error(err, "clGetKernelArgInfo");
-
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME,
-                                 sizeof(actual.arg_type), &(actual.arg_type),
-                                 nullptr);
-        test_error(err, "clGetKernelArgInfo");
-
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME,
-                                 sizeof(actual.arg_name), &(actual.arg_name),
-                                 nullptr);
-        test_error(err, "clGetKernelArgInfo");
-
-        failed_tests += compare_expected_actual(expected_args[i], actual);
-    }
-    return failed_tests;
-}
-
-size_t get_param_size(const std::string& arg_type, cl_device_id deviceID,
-                      bool is_pipe)
-{
-    if (is_pipe)
-    {
-        return (sizeof(int*));
-    }
-    if (arg_type.find("*") != std::string::npos)
-    {
-        cl_uint device_address_bits = 0;
-        cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS,
-                                     sizeof(device_address_bits),
-                                     &device_address_bits, NULL);
-        return (device_address_bits / 8);
-    }
-
-    size_t ret(0);
-    if (arg_type.find("char") != std::string::npos)
-    {
-        ret += sizeof(cl_char);
-    }
-    if (arg_type.find("short") != std::string::npos)
-    {
-        ret += sizeof(cl_short);
-    }
-    if (arg_type.find("half") != std::string::npos)
-    {
-        ret += sizeof(cl_half);
-    }
-    if (arg_type.find("int") != std::string::npos)
-    {
-        ret += sizeof(cl_int);
-    }
-    if (arg_type.find("long") != std::string::npos)
-    {
-        ret += sizeof(cl_long);
-    }
-    if (arg_type.find("float") != std::string::npos)
-    {
-        ret += sizeof(cl_float);
-    }
-    if (arg_type.find("double") != std::string::npos)
-    {
-        ret += sizeof(cl_double);
-    }
-    if (arg_type.back() == '2')
-    {
-        ret *= 2;
-    }
-    if (arg_type.back() == '3')
-    {
-        ret *= 4;
-    }
-    if (arg_type.back() == '4')
-    {
-        ret *= 4;
-    }
-    if (arg_type.back() == '8')
-    {
-        ret *= 8;
-    }
-    // If the last character is a 6 it represents a vector of 16
-    if (arg_type.back() == '6')
-    {
-        ret *= 16;
-    }
-    return ret;
-}
-
-static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID)
-{
-    int failed_tests = 0;
-
-    std::vector<std::string> type_arguments =
-        generate_all_type_arguments(deviceID);
-
-    const std::vector<cl_kernel_arg_access_qualifier> access_qualifiers = {
-        CL_KERNEL_ARG_ACCESS_NONE, CL_KERNEL_ARG_ACCESS_READ_ONLY,
-        CL_KERNEL_ARG_ACCESS_WRITE_ONLY
-    };
-
-    std::vector<KernelArgInfo> all_args, expected_args;
-    size_t max_param_size = get_max_param_size(deviceID);
-    size_t total_param_size(0);
-    for (auto address_qualifier : address_qualifiers)
-    {
-        bool is_private = (address_qualifier == CL_KERNEL_ARG_ADDRESS_PRIVATE);
-
-        /* OpenCL kernels cannot take "private" pointers and only "private"
-         * variables can take values */
-        bool is_pointer = !is_private;
-
-        for (auto type_qualifier : type_qualifiers)
+        if(kernel_rc == 0)
         {
-            bool is_pipe = (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE);
-            bool is_restrict = (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT);
+            // Determine the expected number of arguments.
+            arg_count = 0;
+            while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL)
+                ++arg_count;
 
-            for (auto access_qualifier : access_qualifiers)
+            // Try to get the number of arguments.
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size );
+            test_error( error, "Unable to get kernel arg count param size" );
+            if( size != sizeof( numArgs ) )
             {
-                bool has_access_qualifier =
-                    (access_qualifier != CL_KERNEL_ARG_ACCESS_NONE);
+                log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name );
+                kernel_rc = -1;
+            }
+        }
 
-                /*Only images and pipes can have an access qualifier,
-                 * otherwise it should be ACCESS_NONE */
-                if (!is_pipe && has_access_qualifier)
+
+        if(kernel_rc == 0)
+        {
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+            test_error( error, "Unable to get kernel arg count" );
+            if( numArgs != arg_count )
+            {
+                log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+
+        if(kernel_rc == 0)
+        {
+            for ( j = 0; j < numArgs; ++j )
+            {
+
+                int arg_rc = 0;
+                cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ];
+                cl_kernel_arg_access_qualifier expected_access_qualifier =  (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ];
+                cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ];
+                const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ];
+                const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ];
+
+                // Try to get the address qualifier of each argument.
+                cl_kernel_arg_address_qualifier address_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size );
+                test_error( error, "Unable to get argument address qualifier" );
+                error = (address_qualifier != expected_address_qualifier);
+                if ( error )
                 {
-                    continue;
+                    log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier );
+                    arg_rc = -1;
                 }
 
-                /* If the type is a pipe, then either the specified or
-                 * default access qualifier is returned and so "NONE" will
-                 * never be returned */
-                if (is_pipe && !has_access_qualifier)
+                // Try to get the access qualifier of each argument.
+                cl_kernel_arg_access_qualifier access_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size );
+                test_error( error, "Unable to get argument access qualifier" );
+                error = (access_qualifier != expected_access_qualifier);
+                if ( error )
                 {
-                    continue;
+                    log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier );
+                    arg_rc = -1;
                 }
 
-                /* The "restrict" type qualifier can only apply to
-                 * pointers
-                 */
-                if (is_restrict && !is_pointer)
+                // Try to get the type qualifier of each argument.
+                cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size );
+                test_error( error, "Unable to get argument type qualifier" );
+                error = (arg_type_qualifier != expected_type_qualifier);
+                if ( error )
                 {
-                    continue;
+                    log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier );
+                    arg_rc = -1;
                 }
 
-                /* We cannot have pipe pointers */
-                if (is_pipe && is_pointer)
+                // Try to get the type of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument type name" );
+                error = strcmp( (const char*) name, expected_type_name );
+                if ( error )
                 {
-                    continue;
+                    log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name );
+                    arg_rc = -1;
                 }
 
-
-                for (auto arg_type : type_arguments)
+                // Try to get the name of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument name" );
+                error = strcmp( (const char*) name, expected_arg_name );
+                if ( error )
                 {
-                    /* Void Types cannot be private */
-                    if (is_private && arg_type == "void")
-                    {
-                        continue;
-                    }
+                    log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name );
+                    arg_rc = -1;
+                }
 
-                    if (is_pointer)
-                    {
-                        arg_type += "*";
-                    }
-                    size_t param_size =
-                        get_param_size(arg_type, deviceID, is_pipe);
-                    if (param_size + total_param_size >= max_param_size
-                        || all_args.size() == MAX_NUMBER_OF_KERNEL_ARGS)
-                    {
-                        const std::string kernel_src =
-                            generate_kernel(all_args);
-                        failed_tests += compare_kernel_with_expected(
-                            context, deviceID, kernel_src.c_str(),
-                            expected_args);
-                        all_args.clear();
-                        expected_args.clear();
-                        total_param_size = 0;
-                    }
-                    total_param_size += param_size;
-
-                    KernelArgInfo kernel_argument(
-                        address_qualifier, access_qualifier, type_qualifier,
-                        arg_type, all_args.size());
-
-                    expected_args.push_back(
-                        create_expected_arg_info(kernel_argument, is_pointer));
-
-                    all_args.push_back(kernel_argument);
+                if(arg_rc != 0) {
+                    kernel_rc = -1;
                 }
             }
         }
-    }
-    const std::string kernel_src = generate_kernel(all_args);
-    failed_tests += compare_kernel_with_expected(
-        context, deviceID, kernel_src.c_str(), expected_args);
-    return failed_tests;
-}
 
-static cl_uint get_max_number_of_pipes(cl_device_id deviceID, cl_int& err)
-{
-    cl_uint ret(0);
-    err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PIPE_ARGS, sizeof(ret), &ret,
-                          nullptr);
-    return ret;
-}
-
-static int run_pipe_tests(cl_context context, cl_device_id deviceID)
-{
-    int failed_tests = 0;
-
-    cl_kernel_arg_address_qualifier address_qualifier =
-        CL_KERNEL_ARG_ADDRESS_PRIVATE;
-    std::vector<std::string> type_arguments =
-        generate_all_type_arguments(deviceID);
-    const std::vector<cl_kernel_arg_access_qualifier> access_qualifiers = {
-        CL_KERNEL_ARG_ACCESS_READ_ONLY, CL_KERNEL_ARG_ACCESS_WRITE_ONLY
-    };
-    std::vector<KernelArgInfo> all_args, expected_args;
-    size_t max_param_size = get_max_param_size(deviceID);
-    size_t total_param_size(0);
-    cl_int err = CL_SUCCESS;
-    cl_uint max_number_of_pipes = get_max_number_of_pipes(deviceID, err);
-    test_error_ret(err, "get_max_number_of_pipes", TEST_FAIL);
-    cl_uint number_of_pipes(0);
-
-    const bool is_pointer = false;
-    const bool is_pipe = true;
-
-    for (auto type_qualifier : pipe_qualifiers)
-    {
-        for (auto access_qualifier : access_qualifiers)
-        {
-            for (auto arg_type : type_arguments)
-            {
-                /* We cannot have void pipes */
-                if (arg_type == "void")
-                {
-                    continue;
-                }
-
-                size_t param_size = get_param_size(arg_type, deviceID, is_pipe);
-                if (param_size + total_param_size >= max_param_size
-                    || number_of_pipes == max_number_of_pipes)
-                {
-                    const std::string kernel_src = generate_kernel(all_args);
-                    failed_tests += compare_kernel_with_expected(
-                        context, deviceID, kernel_src.c_str(), expected_args);
-                    all_args.clear();
-                    expected_args.clear();
-                    total_param_size = 0;
-                    number_of_pipes = 0;
-                }
-                total_param_size += param_size;
-                number_of_pipes++;
-
-                KernelArgInfo kernel_argument(address_qualifier,
-                                              access_qualifier, type_qualifier,
-                                              arg_type, all_args.size());
-
-                expected_args.push_back(
-                    create_expected_arg_info(kernel_argument, is_pointer));
-
-                all_args.push_back(kernel_argument);
-            }
+        //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" );
+        if(kernel_rc != 0) {
+            rc = -1;
         }
     }
-    const std::string kernel_src = generate_kernel(all_args);
-    failed_tests += compare_kernel_with_expected(
-        context, deviceID, kernel_src.c_str(), expected_args);
-    return failed_tests;
+  return rc;
 }
 
-static int run_sampler_test(cl_context context, cl_device_id deviceID)
-{
-    cl_kernel_arg_address_qualifier address_qualifier =
-        CL_KERNEL_ARG_ADDRESS_PRIVATE;
-    cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    cl_kernel_arg_access_qualifier access_qualifier = CL_KERNEL_ARG_ACCESS_NONE;
-    std::string image_type = "sampler_t";
-    bool is_pointer = false;
 
-    KernelArgInfo kernel_argument(address_qualifier, access_qualifier,
-                                  type_qualifier, image_type,
-                                  SINGLE_KERNEL_ARG_NUMBER);
-
-    KernelArgInfo expected =
-        create_expected_arg_info(kernel_argument, is_pointer);
-
-    const std::string kernel_src = generate_kernel({ kernel_argument });
-
-    return compare_kernel_with_expected(context, deviceID, kernel_src.c_str(),
-                                        { expected });
-}
-
-static int run_image_tests(cl_context context, cl_device_id deviceID)
-{
-    int failed_tests = 0;
-    bool supports_3d_image_writes =
-        is_extension_available(deviceID, "cl_khr_3d_image_writes");
-    bool is_pointer = false;
-    cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    cl_kernel_arg_address_qualifier address_qualifier =
-        CL_KERNEL_ARG_ADDRESS_GLOBAL;
-
-    for (auto access_qualifier : access_qualifiers)
-    {
-        bool is_write =
-            (access_qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY
-             || access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE);
-        for (auto image_type : image_arguments)
-        {
-            bool is_3d_image = image_type == "image3d_t";
-            /* We can only test 3d image writes if our device supports it */
-            if (is_3d_image && is_write)
-            {
-                if (!supports_3d_image_writes)
-                {
-                    continue;
-                }
-            }
-            KernelArgInfo kernel_argument(address_qualifier, access_qualifier,
-                                          type_qualifier, image_type,
-                                          SINGLE_KERNEL_ARG_NUMBER);
-            KernelArgInfo expected =
-                create_expected_arg_info(kernel_argument, is_pointer);
-            const std::string kernel_src =
-                generate_kernel({ kernel_argument }, supports_3d_image_writes);
-
-            failed_tests += compare_kernel_with_expected(
-                context, deviceID, kernel_src.c_str(), { expected });
-        }
-    }
-    failed_tests += run_sampler_test(context, deviceID);
-    return failed_tests;
-}
-
-/* Ensure clGetKernelArgInfo returns successfully when param_value is
- * set to null */
-static int test_null_param(cl_context context, cl_device_id deviceID,
-                           char const* kernel_src)
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(deviceID).c_str());
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
-
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr,
-                             nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-
-    err =
-        clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                           CL_KERNEL_ARG_ACCESS_QUALIFIER, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_TYPE_QUALIFIER, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_TYPE_NAME, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_NAME, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-
-    return TEST_PASS;
-}
-
-/* Ensure clGetKernelArgInfo returns the correct size in bytes for the
- * kernel arg name */
-static int test_arg_name_size(cl_context context, cl_device_id deviceID,
-                              char const* kernel_src)
+int    test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
     size_t size;
-    /* We are adding +1 because the argument used in this kernel is argument0
-     * which has 1 extra character than just the base argument name */
-    char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1];
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(deviceID).c_str());
+    int error;
 
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
+    cl_bool supports_double = 0; // assume not
+    cl_bool supports_half = 0; // assume not
+    cl_bool supports_images = 0; // assume not
+    cl_bool supports_long = 0; // assume not
+    cl_bool supports_3D_images = 0; // assume not
 
-    err =
-        clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME,
-                           sizeof(arg_return), &arg_return, &size);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-    if (size == sizeof(KERNEL_ARGUMENT_NAME) + 1)
-    {
-        return TEST_PASS;
-    }
-    else
-    {
-        return TEST_FAIL;
-    }
-}
+    // Check if this device supports images
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
 
-static int run_boundary_tests(cl_context context, cl_device_id deviceID)
-{
-    int failed_tests = 0;
+  if (supports_images) {
+    log_info(" o Device supports images\n");
+    log_info(" o Expecting SUCCESS when testing image kernel arguments.\n");
+  }
+  else {
+    log_info(" o Device lacks image support\n");
+    log_info(" o Not testing image kernel arguments.\n");
+  }
 
-    cl_kernel_arg_address_qualifier address_qualifier =
-        CL_KERNEL_ARG_ADDRESS_GLOBAL;
-    cl_kernel_arg_access_qualifier access_qualifier = CL_KERNEL_ARG_ACCESS_NONE;
-    cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    std::string arg_type = "int*";
-    KernelArgInfo arg_info(address_qualifier, access_qualifier, type_qualifier,
-                           arg_type, SINGLE_KERNEL_ARG_NUMBER);
-    const std::string kernel_src = generate_kernel({ arg_info });
-
-    failed_tests += test_arg_name_size(context, deviceID, kernel_src.c_str());
-
-    if (test_null_param(context, deviceID, kernel_src.c_str()) != TEST_PASS)
-    {
-        failed_tests++;
-    }
-
-    return failed_tests;
-}
-
-static int run_all_tests(cl_context context, cl_device_id deviceID)
-{
-
-    int failed_scalar_tests = run_scalar_vector_tests(context, deviceID);
-    if (failed_scalar_tests == 0)
-    {
-        log_info("All Data Type Tests Passed\n");
-    }
-    else
-    {
-        log_error("%d Data Type Test(s) Failed\n", failed_scalar_tests);
-    }
-
-    int failed_image_tests = 0;
-    if (checkForImageSupport(deviceID) == 0)
-    {
-        failed_image_tests = run_image_tests(context, deviceID);
-        if (failed_image_tests == 0)
-        {
-            log_info("All Image Tests Passed\n");
-        }
-        else
-        {
-            log_error("%d Image Test(s) Failed\n", failed_image_tests);
-        }
-    }
-    int failed_pipe_tests = 0;
-    // TODO https://github.com/KhronosGroup/OpenCL-CTS/issues/1244
-    if (false)
-    {
-        failed_pipe_tests = run_pipe_tests(context, deviceID);
-        if (failed_pipe_tests == 0)
-        {
-            log_info("All Pipe Tests Passed\n");
-        }
-        else
-        {
-            log_error("%d Pipe Test(s) Failed\n", failed_pipe_tests);
+    if (is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info(" o Device claims extension 'cl_khr_fp64'\n");
+        log_info(" o Expecting SUCCESS when testing double kernel arguments.\n");
+        supports_double = 1;
+    } else {
+        cl_device_fp_config double_fp_config;
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
+        if (double_fp_config != 0)
+            supports_double = 1;
+        else {
+            log_info(" o Device lacks extension 'cl_khr_fp64'\n");
+            log_info(" o Not testing double kernel arguments.\n");
+            supports_double = 0;
         }
     }
 
-    int failed_boundary_tests = run_boundary_tests(context, deviceID);
-    if (failed_boundary_tests == 0)
-    {
-        log_info("All Edge Case Tests Passed\n");
-    }
-    else
-    {
-        log_error("%d Edge Case Test(s) Failed\n", failed_boundary_tests);
+    if (is_extension_available(deviceID, "cl_khr_fp16")) {
+        log_info(" o Device claims extension 'cl_khr_fp16'\n");
+        log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n");
+        supports_half = 1;
+    } else {
+        log_info(" o Device lacks extension 'cl_khr_fp16'\n");
+        log_info(" o Not testing halfn* kernel arguments.\n");
+        supports_half = 0;
     }
 
-    return (failed_scalar_tests + failed_image_tests + failed_pipe_tests
-            + failed_boundary_tests);
-}
+    if (is_extension_available(deviceID, "cl_khr_int64"))
+    {
+        log_info(" o Device claims extension 'cl_khr_int64'\n");
+        log_info(" o Expecting SUCCESS when testing long kernel arguments.\n");
+        supports_long = 1;
+    } else
+    {
+        log_info(" o Device lacks extension 'cl_khr_int64'\n");
+        log_info(" o Not testing long kernel arguments.\n");
+        supports_long = 0;
+    }
 
-int test_get_kernel_arg_info(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements)
-{
-    int failed_tests = run_all_tests(context, deviceID);
-    if (failed_tests != 0)
+    error = checkFor3DImageSupport(deviceID);
+    if (error != CL_IMAGE_FORMAT_NOT_SUPPORTED)
     {
-        log_error("%d Test(s) Failed\n", failed_tests);
-        return TEST_FAIL;
-    }
-    else
+        log_info(" o Device supports 3D images\n");
+        log_info(" o Expecting SUCCESS when testing 3D image kernel arguments.\n");
+        supports_3D_images = 1;
+    } else
     {
-        return TEST_PASS;
+        log_info(" o Device lacks 3D image support\n");
+        log_info(" o Not testing 3D image kernel arguments.\n");
+        supports_3D_images = 0;
     }
+
+    int test_failed = 0;
+
+    // Now create a test program using required arguments
+    log_info("Testing required kernel arguments...\n");
+    error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+
+    if ( supports_images )
+    {
+        log_info("Testing optional image arguments...\n");
+        error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_double )
+    {
+        log_info("Testing optional double arguments...\n");
+        error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_half )
+    {
+        log_info("Testing optional half arguments...\n");
+        error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_long )
+    {
+        log_info("Testing optional long arguments...\n");
+        error = test(deviceID, context, long_kernel_args, sizeof(long_kernel_args)/sizeof(long_kernel_args[0]), long_arg_info, sizeof(long_arg_info)/sizeof(long_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_3D_images )
+    {
+        log_info("Testing optional 3D image arguments...\n");
+        error = test(deviceID, context, image_3D_kernel_args, sizeof(image_3D_kernel_args)/sizeof(image_3D_kernel_args[0]), image_3D_arg_info, sizeof(image_3D_arg_info)/sizeof(image_3D_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    return test_failed;
 }

diff --git a/test_conformance/api/test_kernel_arg_info_compatibility.cpp b/test_conformance/api/test_kernel_arg_info_compatibility.cpp
new file mode 100644
index 0000000..a6b60c2
--- /dev/null
+++ b/test_conformance/api/test_kernel_arg_info_compatibility.cpp

@@ -0,0 +1,5159 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include <limits.h>
+#include <ctype.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#define ARG_INFO_FIELD_COUNT        5
+
+#define ARG_INFO_ADDR_OFFSET        1
+#define ARG_INFO_ACCESS_OFFSET        2
+#define ARG_INFO_TYPE_QUAL_OFFSET    3
+#define ARG_INFO_TYPE_NAME_OFFSET    4
+#define ARG_INFO_ARG_NAME_OFFSET    5
+
+typedef char const * kernel_args_t[];
+
+static kernel_args_t required_kernel_args = {
+    "typedef float4 typedef_type;\n"
+    "\n"
+    "typedef struct struct_type {\n"
+    "    float4 float4d;\n"
+    "    int intd;\n"
+    "} typedef_struct_type;\n"
+    "\n"
+    "typedef union union_type {\n"
+    "    float4 float4d;\n"
+    "    uint4 uint4d;\n"
+    "} typedef_union_type;\n"
+    "\n"
+    "typedef enum enum_type {\n"
+    "    enum_type_zero,\n"
+    "    enum_type_one,\n"
+    "    enum_type_two\n"
+    "} typedef_enum_type;\n"
+    "\n"
+    "kernel void constant_scalar_p0(constant void*constantvoidp,\n"
+    "                              constant char *constantcharp,\n"
+    "                              constant uchar* constantucharp,\n"
+    "                              constant unsigned char * constantunsignedcharp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p1(constant short*constantshortp,\n"
+    "                              constant ushort *constantushortp,\n"
+    "                              constant unsigned short* constantunsignedshortp,\n"
+    "                              constant int * constantintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p2(constant uint*constantuintp,\n"
+    "                              constant unsigned int *constantunsignedintp,\n"
+    "                              constant long* constantlongp,\n"
+    "                              constant ulong * constantulongp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp,\n"
+    "                              constant float *constantfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n"
+    "                                       constant char * restrict constantcharrestrictp,\n"
+    "                                       constant uchar*restrict constantucharrestrictp,\n"
+    "                                       constant unsigned char *restrict constantunsignedcharrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n"
+    "                                       constant ushort * restrict constantushortrestrictp,\n"
+    "                                       constant unsigned short*restrict constantunsignedshortrestrictp,\n"
+    "                                       constant int *restrict constantintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n"
+    "                                       constant unsigned int * restrict constantunsignedintrestrictp,\n"
+    "                                       constant long*restrict constantlongrestrictp,\n"
+    "                                       constant ulong *restrict constantulongrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp,\n"
+    "                                       constant float * restrict constantfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_p(global void*globalvoidp,\n"
+    "                            global char *globalcharp,\n"
+    "                            global uchar* globalucharp,\n"
+    "                            global unsigned char * globalunsignedcharp,\n"
+    "                            global short*globalshortp,\n"
+    "                            global ushort *globalushortp,\n"
+    "                            global unsigned short* globalunsignedshortp,\n"
+    "                            global int * globalintp,\n"
+    "                            global uint*globaluintp,\n"
+    "                            global unsigned int *globalunsignedintp,\n"
+    "                            global long* globallongp,\n"
+    "                            global ulong * globalulongp,\n"
+    "                            global unsigned long*globalunsignedlongp,\n"
+    "                            global float *globalfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n"
+    "                                     global char * restrict globalcharrestrictp,\n"
+    "                                     global uchar*restrict globalucharrestrictp,\n"
+    "                                     global unsigned char *restrict globalunsignedcharrestrictp,\n"
+    "                                     global short* restrict globalshortrestrictp,\n"
+    "                                     global ushort * restrict globalushortrestrictp,\n"
+    "                                     global unsigned short*restrict globalunsignedshortrestrictp,\n"
+    "                                     global int *restrict globalintrestrictp,\n"
+    "                                     global uint* restrict globaluintrestrictp,\n"
+    "                                     global unsigned int * restrict globalunsignedintrestrictp,\n"
+    "                                     global long*restrict globallongrestrictp,\n"
+    "                                     global ulong *restrict globalulongrestrictp,\n"
+    "                                     global unsigned long* restrict globalunsignedlongrestrictp,\n"
+    "                                     global float * restrict globalfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n"
+    "                                  global const char *globalconstcharp,\n"
+    "                                  global const uchar* globalconstucharp,\n"
+    "                                  global const unsigned char * globalconstunsignedcharp,\n"
+    "                                  global const short*globalconstshortp,\n"
+    "                                  global const ushort *globalconstushortp,\n"
+    "                                  global const unsigned short* globalconstunsignedshortp,\n"
+    "                                  global const int * globalconstintp,\n"
+    "                                  global const uint*globalconstuintp,\n"
+    "                                  global const unsigned int *globalconstunsignedintp,\n"
+    "                                  global const long* globalconstlongp,\n"
+    "                                  global const ulong * globalconstulongp,\n"
+    "                                  global const unsigned long*globalconstunsignedlongp,\n"
+    "                                  global const float *globalconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n"
+    "                                           global const char * restrict globalconstcharrestrictp,\n"
+    "                                           global const uchar*restrict globalconstucharrestrictp,\n"
+    "                                           global const unsigned char *restrict globalconstunsignedcharrestrictp,\n"
+    "                                           global const short* restrict globalconstshortrestrictp,\n"
+    "                                           global const ushort * restrict globalconstushortrestrictp,\n"
+    "                                           global const unsigned short*restrict globalconstunsignedshortrestrictp,\n"
+    "                                           global const int *restrict globalconstintrestrictp,\n"
+    "                                           global const uint* restrict globalconstuintrestrictp,\n"
+    "                                           global const unsigned int * restrict globalconstunsignedintrestrictp,\n"
+    "                                           global const long*restrict globalconstlongrestrictp,\n"
+    "                                           global const ulong *restrict globalconstulongrestrictp,\n"
+    "                                           global const unsigned long* restrict globalconstunsignedlongrestrictp,\n"
+    "                                           global const float * restrict globalconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n"
+    "                                     global volatile char *globalvolatilecharp,\n"
+    "                                     global volatile uchar* globalvolatileucharp,\n"
+    "                                     global volatile unsigned char * globalvolatileunsignedcharp,\n"
+    "                                     global volatile short*globalvolatileshortp,\n"
+    "                                     global volatile ushort *globalvolatileushortp,\n"
+    "                                     global volatile unsigned short* globalvolatileunsignedshortp,\n"
+    "                                     global volatile int * globalvolatileintp,\n"
+    "                                     global volatile uint*globalvolatileuintp,\n"
+    "                                     global volatile unsigned int *globalvolatileunsignedintp,\n"
+    "                                     global volatile long* globalvolatilelongp,\n"
+    "                                     global volatile ulong * globalvolatileulongp,\n"
+    "                                     global volatile unsigned long*globalvolatileunsignedlongp,\n"
+    "                                     global volatile float *globalvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n"
+    "                                              global volatile char * restrict globalvolatilecharrestrictp,\n"
+    "                                              global volatile uchar*restrict globalvolatileucharrestrictp,\n"
+    "                                              global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n"
+    "                                              global volatile short* restrict globalvolatileshortrestrictp,\n"
+    "                                              global volatile ushort * restrict globalvolatileushortrestrictp,\n"
+    "                                              global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n"
+    "                                              global volatile int *restrict globalvolatileintrestrictp,\n"
+    "                                              global volatile uint* restrict globalvolatileuintrestrictp,\n"
+    "                                              global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n"
+    "                                              global volatile long*restrict globalvolatilelongrestrictp,\n"
+    "                                              global volatile ulong *restrict globalvolatileulongrestrictp,\n"
+    "                                              global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp,\n"
+    "                                              global volatile float * restrict globalvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n"
+    "                                           global const volatile char *globalconstvolatilecharp,\n"
+    "                                           global const volatile uchar* globalconstvolatileucharp,\n"
+    "                                           global const volatile unsigned char * globalconstvolatileunsignedcharp,\n"
+    "                                           global const volatile short*globalconstvolatileshortp,\n"
+    "                                           global const volatile ushort *globalconstvolatileushortp,\n"
+    "                                           global const volatile unsigned short* globalconstvolatileunsignedshortp,\n"
+    "                                           global const volatile int * globalconstvolatileintp,\n"
+    "                                           global const volatile uint*globalconstvolatileuintp,\n"
+    "                                           global const volatile unsigned int *globalconstvolatileunsignedintp,\n"
+    "                                           global const volatile long* globalconstvolatilelongp,\n"
+    "                                           global const volatile ulong * globalconstvolatileulongp,\n"
+    "                                           global const volatile unsigned long*globalconstvolatileunsignedlongp,\n"
+    "                                           global const volatile float *globalconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n"
+    "                                                    global const volatile char * restrict globalconstvolatilecharrestrictp,\n"
+    "                                                    global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n"
+    "                                                    global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n"
+    "                                                    global const volatile short* restrict globalconstvolatileshortrestrictp,\n"
+    "                                                    global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n"
+    "                                                    global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n"
+    "                                                    global const volatile int *restrict globalconstvolatileintrestrictp,\n"
+    "                                                    global const volatile uint* restrict globalconstvolatileuintrestrictp,\n"
+    "                                                    global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n"
+    "                                                    global const volatile long*restrict globalconstvolatilelongrestrictp,\n"
+    "                                                    global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n"
+    "                                                    global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp,\n"
+    "                                                    global const volatile float * restrict globalconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_p(local void*localvoidp,\n"
+    "                           local char *localcharp,\n"
+    "                           local uchar* localucharp,\n"
+    "                           local unsigned char * localunsignedcharp,\n"
+    "                           local short*localshortp,\n"
+    "                           local ushort *localushortp,\n"
+    "                           local unsigned short* localunsignedshortp,\n"
+    "                           local int * localintp,\n"
+    "                           local uint*localuintp,\n"
+    "                           local unsigned int *localunsignedintp,\n"
+    "                           local long* locallongp,\n"
+    "                           local ulong * localulongp,\n"
+    "                           local unsigned long*localunsignedlongp,\n"
+    "                           local float *localfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n"
+    "                                    local char * restrict localcharrestrictp,\n"
+    "                                    local uchar*restrict localucharrestrictp,\n"
+    "                                    local unsigned char *restrict localunsignedcharrestrictp,\n"
+    "                                    local short* restrict localshortrestrictp,\n"
+    "                                    local ushort * restrict localushortrestrictp,\n"
+    "                                    local unsigned short*restrict localunsignedshortrestrictp,\n"
+    "                                    local int *restrict localintrestrictp,\n"
+    "                                    local uint* restrict localuintrestrictp,\n"
+    "                                    local unsigned int * restrict localunsignedintrestrictp,\n"
+    "                                    local long*restrict locallongrestrictp,\n"
+    "                                    local ulong *restrict localulongrestrictp,\n"
+    "                                    local unsigned long* restrict localunsignedlongrestrictp,\n"
+    "                                    local float * restrict localfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_p(local const void*localconstvoidp,\n"
+    "                                 local const char *localconstcharp,\n"
+    "                                 local const uchar* localconstucharp,\n"
+    "                                 local const unsigned char * localconstunsignedcharp,\n"
+    "                                 local const short*localconstshortp,\n"
+    "                                 local const ushort *localconstushortp,\n"
+    "                                 local const unsigned short* localconstunsignedshortp,\n"
+    "                                 local const int * localconstintp,\n"
+    "                                 local const uint*localconstuintp,\n"
+    "                                 local const unsigned int *localconstunsignedintp,\n"
+    "                                 local const long* localconstlongp,\n"
+    "                                 local const ulong * localconstulongp,\n"
+    "                                 local const unsigned long*localconstunsignedlongp,\n"
+    "                                 local const float *localconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n"
+    "                                          local const char * restrict localconstcharrestrictp,\n"
+    "                                          local const uchar*restrict localconstucharrestrictp,\n"
+    "                                          local const unsigned char *restrict localconstunsignedcharrestrictp,\n"
+    "                                          local const short* restrict localconstshortrestrictp,\n"
+    "                                          local const ushort * restrict localconstushortrestrictp,\n"
+    "                                          local const unsigned short*restrict localconstunsignedshortrestrictp,\n"
+    "                                          local const int *restrict localconstintrestrictp,\n"
+    "                                          local const uint* restrict localconstuintrestrictp,\n"
+    "                                          local const unsigned int * restrict localconstunsignedintrestrictp,\n"
+    "                                          local const long*restrict localconstlongrestrictp,\n"
+    "                                          local const ulong *restrict localconstulongrestrictp,\n"
+    "                                          local const unsigned long* restrict localconstunsignedlongrestrictp,\n"
+    "                                          local const float * restrict localconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n"
+    "                                    local volatile char *localvolatilecharp,\n"
+    "                                    local volatile uchar* localvolatileucharp,\n"
+    "                                    local volatile unsigned char * localvolatileunsignedcharp,\n"
+    "                                    local volatile short*localvolatileshortp,\n"
+    "                                    local volatile ushort *localvolatileushortp,\n"
+    "                                    local volatile unsigned short* localvolatileunsignedshortp,\n"
+    "                                    local volatile int * localvolatileintp,\n"
+    "                                    local volatile uint*localvolatileuintp,\n"
+    "                                    local volatile unsigned int *localvolatileunsignedintp,\n"
+    "                                    local volatile long* localvolatilelongp,\n"
+    "                                    local volatile ulong * localvolatileulongp,\n"
+    "                                    local volatile unsigned long*localvolatileunsignedlongp,\n"
+    "                                    local volatile float *localvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n"
+    "                                             local volatile char * restrict localvolatilecharrestrictp,\n"
+    "                                             local volatile uchar*restrict localvolatileucharrestrictp,\n"
+    "                                             local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n"
+    "                                             local volatile short* restrict localvolatileshortrestrictp,\n"
+    "                                             local volatile ushort * restrict localvolatileushortrestrictp,\n"
+    "                                             local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n"
+    "                                             local volatile int *restrict localvolatileintrestrictp,\n"
+    "                                             local volatile uint* restrict localvolatileuintrestrictp,\n"
+    "                                             local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n"
+    "                                             local volatile long*restrict localvolatilelongrestrictp,\n"
+    "                                             local volatile ulong *restrict localvolatileulongrestrictp,\n"
+    "                                             local volatile unsigned long* restrict localvolatileunsignedlongrestrictp,\n"
+    "                                             local volatile float * restrict localvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n"
+    "                                          local const volatile char *localconstvolatilecharp,\n"
+    "                                          local const volatile uchar* localconstvolatileucharp,\n"
+    "                                          local const volatile unsigned char * localconstvolatileunsignedcharp,\n"
+    "                                          local const volatile short*localconstvolatileshortp,\n"
+    "                                          local const volatile ushort *localconstvolatileushortp,\n"
+    "                                          local const volatile unsigned short* localconstvolatileunsignedshortp,\n"
+    "                                          local const volatile int * localconstvolatileintp,\n"
+    "                                          local const volatile uint*localconstvolatileuintp,\n"
+    "                                          local const volatile unsigned int *localconstvolatileunsignedintp,\n"
+    "                                          local const volatile long* localconstvolatilelongp,\n"
+    "                                          local const volatile ulong * localconstvolatileulongp,\n"
+    "                                          local const volatile unsigned long*localconstvolatileunsignedlongp,\n"
+    "                                          local const volatile float *localconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n"
+    "                                                   local const volatile char * restrict localconstvolatilecharrestrictp,\n"
+    "                                                   local const volatile uchar*restrict localconstvolatileucharrestrictp,\n"
+    "                                                   local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n"
+    "                                                   local const volatile short* restrict localconstvolatileshortrestrictp,\n"
+    "                                                   local const volatile ushort * restrict localconstvolatileushortrestrictp,\n"
+    "                                                   local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n"
+    "                                                   local const volatile int *restrict localconstvolatileintrestrictp,\n"
+    "                                                   local const volatile uint* restrict localconstvolatileuintrestrictp,\n"
+    "                                                   local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n"
+    "                                                   local const volatile long*restrict localconstvolatilelongrestrictp,\n"
+    "                                                   local const volatile ulong *restrict localconstvolatileulongrestrictp,\n"
+    "                                                   local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp,\n"
+    "                                                   local const volatile float * restrict localconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void scalar_d(char chard,\n"
+    "                     uchar uchard,\n"
+    "                     unsigned char unsignedchard,\n"
+    "                     short shortd,\n"
+    "                     ushort ushortd,\n"
+    "                     unsigned short unsignedshortd,\n"
+    "                     int intd,\n"
+    "                     uint uintd,\n"
+    "                     unsigned int unsignedintd,\n"
+    "                     long longd,\n"
+    "                     ulong ulongd,\n"
+    "                     unsigned long unsignedlongd,\n"
+    "                     float floatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_scalar_d(const char constchard,\n"
+    "                           const uchar constuchard,\n"
+    "                           const unsigned char constunsignedchard,\n"
+    "                           const short constshortd,\n"
+    "                           const ushort constushortd,\n"
+    "                           const unsigned short constunsignedshortd,\n"
+    "                           const int constintd,\n"
+    "                           const uint constuintd,\n"
+    "                           const unsigned int constunsignedintd,\n"
+    "                           const long constlongd,\n"
+    "                           const ulong constulongd,\n"
+    "                           const unsigned long constunsignedlongd,\n"
+    "                           const float constfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_scalar_d(private char privatechard,\n"
+    "                             private uchar privateuchard,\n"
+    "                             private unsigned char privateunsignedchard,\n"
+    "                             private short privateshortd,\n"
+    "                             private ushort privateushortd,\n"
+    "                             private unsigned short privateunsignedshortd,\n"
+    "                             private int privateintd,\n"
+    "                             private uint privateuintd,\n"
+    "                             private unsigned int privateunsignedintd,\n"
+    "                             private long privatelongd,\n"
+    "                             private ulong privateulongd,\n"
+    "                             private unsigned long privateunsignedlongd,\n"
+    "                             private float privatefloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_scalar_d(private const char privateconstchard,\n"
+    "                                   private const uchar privateconstuchard,\n"
+    "                                   private const unsigned char privateconstunsignedchard,\n"
+    "                                   private const short privateconstshortd,\n"
+    "                                   private const ushort privateconstushortd,\n"
+    "                                   private const unsigned short privateconstunsignedshortd,\n"
+    "                                   private const int privateconstintd,\n"
+    "                                   private const uint privateconstuintd,\n"
+    "                                   private const unsigned int privateconstunsignedintd,\n"
+    "                                   private const long privateconstlongd,\n"
+    "                                   private const ulong privateconstulongd,\n"
+    "                                   private const unsigned long privateconstunsignedlongd,\n"
+    "                                   private const float privateconstfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_p0(constant char2*constantchar2p,\n"
+    "                               constant uchar2 *constantuchar2p,\n"
+    "                               constant short2* constantshort2p,\n"
+    "                               constant ushort2 * constantushort2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p1(constant int2*constantint2p,\n"
+    "                               constant uint2 *constantuint2p,\n"
+    "                               constant long2* constantlong2p,\n"
+    "                               constant ulong2 * constantulong2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n"
+    "                                        constant uchar2* restrict constantuchar2restrictp,\n"
+    "                                        constant short2 * restrict constantshort2restrictp,\n"
+    "                                        constant ushort2*restrict constantushort2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n"
+    "                                        constant uint2* restrict constantuint2restrictp,\n"
+    "                                        constant long2 * restrict constantlong2restrictp,\n"
+    "                                        constant ulong2*restrict constantulong2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_p(global char2*globalchar2p,\n"
+    "                             global uchar2 *globaluchar2p,\n"
+    "                             global short2* globalshort2p,\n"
+    "                             global ushort2 * globalushort2p,\n"
+    "                             global int2*globalint2p,\n"
+    "                             global uint2 *globaluint2p,\n"
+    "                             global long2* globallong2p,\n"
+    "                             global ulong2 * globalulong2p,\n"
+    "                             global float2*globalfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n"
+    "                                      global uchar2* restrict globaluchar2restrictp,\n"
+    "                                      global short2 * restrict globalshort2restrictp,\n"
+    "                                      global ushort2*restrict globalushort2restrictp,\n"
+    "                                      global int2 *restrict globalint2restrictp,\n"
+    "                                      global uint2* restrict globaluint2restrictp,\n"
+    "                                      global long2 * restrict globallong2restrictp,\n"
+    "                                      global ulong2*restrict globalulong2restrictp,\n"
+    "                                      global float2 *restrict globalfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n"
+    "                                   global const uchar2 * globalconstuchar2p,\n"
+    "                                   global const short2*globalconstshort2p,\n"
+    "                                   global const ushort2 *globalconstushort2p,\n"
+    "                                   global const int2* globalconstint2p,\n"
+    "                                   global const uint2 * globalconstuint2p,\n"
+    "                                   global const long2*globalconstlong2p,\n"
+    "                                   global const ulong2 *globalconstulong2p,\n"
+    "                                   global const float2* globalconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n"
+    "                                            global const uchar2*restrict globalconstuchar2restrictp,\n"
+    "                                            global const short2 *restrict globalconstshort2restrictp,\n"
+    "                                            global const ushort2* restrict globalconstushort2restrictp,\n"
+    "                                            global const int2 * restrict globalconstint2restrictp,\n"
+    "                                            global const uint2*restrict globalconstuint2restrictp,\n"
+    "                                            global const long2 *restrict globalconstlong2restrictp,\n"
+    "                                            global const ulong2* restrict globalconstulong2restrictp,\n"
+    "                                            global const float2 * restrict globalconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n"
+    "                                      global volatile uchar2 *globalvolatileuchar2p,\n"
+    "                                      global volatile short2* globalvolatileshort2p,\n"
+    "                                      global volatile ushort2 * globalvolatileushort2p,\n"
+    "                                      global volatile int2*globalvolatileint2p,\n"
+    "                                      global volatile uint2 *globalvolatileuint2p,\n"
+    "                                      global volatile long2* globalvolatilelong2p,\n"
+    "                                      global volatile ulong2 * globalvolatileulong2p,\n"
+    "                                      global volatile float2*globalvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n"
+    "                                               global volatile uchar2* restrict globalvolatileuchar2restrictp,\n"
+    "                                               global volatile short2 * restrict globalvolatileshort2restrictp,\n"
+    "                                               global volatile ushort2*restrict globalvolatileushort2restrictp,\n"
+    "                                               global volatile int2 *restrict globalvolatileint2restrictp,\n"
+    "                                               global volatile uint2* restrict globalvolatileuint2restrictp,\n"
+    "                                               global volatile long2 * restrict globalvolatilelong2restrictp,\n"
+    "                                               global volatile ulong2*restrict globalvolatileulong2restrictp,\n"
+    "                                               global volatile float2 *restrict globalvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n"
+    "                                            global const volatile uchar2 * globalconstvolatileuchar2p,\n"
+    "                                            global const volatile short2*globalconstvolatileshort2p,\n"
+    "                                            global const volatile ushort2 *globalconstvolatileushort2p,\n"
+    "                                            global const volatile int2* globalconstvolatileint2p,\n"
+    "                                            global const volatile uint2 * globalconstvolatileuint2p,\n"
+    "                                            global const volatile long2*globalconstvolatilelong2p,\n"
+    "                                            global const volatile ulong2 *globalconstvolatileulong2p,\n"
+    "                                            global const volatile float2* globalconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n"
+    "                                                     global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n"
+    "                                                     global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n"
+    "                                                     global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n"
+    "                                                     global const volatile int2 * restrict globalconstvolatileint2restrictp,\n"
+    "                                                     global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n"
+    "                                                     global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n"
+    "                                                     global const volatile ulong2* restrict globalconstvolatileulong2restrictp,\n"
+    "                                                     global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_p(local char2*localchar2p,\n"
+    "                            local uchar2 *localuchar2p,\n"
+    "                            local short2* localshort2p,\n"
+    "                            local ushort2 * localushort2p,\n"
+    "                            local int2*localint2p,\n"
+    "                            local uint2 *localuint2p,\n"
+    "                            local long2* locallong2p,\n"
+    "                            local ulong2 * localulong2p,\n"
+    "                            local float2*localfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n"
+    "                                     local uchar2* restrict localuchar2restrictp,\n"
+    "                                     local short2 * restrict localshort2restrictp,\n"
+    "                                     local ushort2*restrict localushort2restrictp,\n"
+    "                                     local int2 *restrict localint2restrictp,\n"
+    "                                     local uint2* restrict localuint2restrictp,\n"
+    "                                     local long2 * restrict locallong2restrictp,\n"
+    "                                     local ulong2*restrict localulong2restrictp,\n"
+    "                                     local float2 *restrict localfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n"
+    "                                  local const uchar2 * localconstuchar2p,\n"
+    "                                  local const short2*localconstshort2p,\n"
+    "                                  local const ushort2 *localconstushort2p,\n"
+    "                                  local const int2* localconstint2p,\n"
+    "                                  local const uint2 * localconstuint2p,\n"
+    "                                  local const long2*localconstlong2p,\n"
+    "                                  local const ulong2 *localconstulong2p,\n"
+    "                                  local const float2* localconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n"
+    "                                           local const uchar2*restrict localconstuchar2restrictp,\n"
+    "                                           local const short2 *restrict localconstshort2restrictp,\n"
+    "                                           local const ushort2* restrict localconstushort2restrictp,\n"
+    "                                           local const int2 * restrict localconstint2restrictp,\n"
+    "                                           local const uint2*restrict localconstuint2restrictp,\n"
+    "                                           local const long2 *restrict localconstlong2restrictp,\n"
+    "                                           local const ulong2* restrict localconstulong2restrictp,\n"
+    "                                           local const float2 * restrict localconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n"
+    "                                     local volatile uchar2 *localvolatileuchar2p,\n"
+    "                                     local volatile short2* localvolatileshort2p,\n"
+    "                                     local volatile ushort2 * localvolatileushort2p,\n"
+    "                                     local volatile int2*localvolatileint2p,\n"
+    "                                     local volatile uint2 *localvolatileuint2p,\n"
+    "                                     local volatile long2* localvolatilelong2p,\n"
+    "                                     local volatile ulong2 * localvolatileulong2p,\n"
+    "                                     local volatile float2*localvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n"
+    "                                              local volatile uchar2* restrict localvolatileuchar2restrictp,\n"
+    "                                              local volatile short2 * restrict localvolatileshort2restrictp,\n"
+    "                                              local volatile ushort2*restrict localvolatileushort2restrictp,\n"
+    "                                              local volatile int2 *restrict localvolatileint2restrictp,\n"
+    "                                              local volatile uint2* restrict localvolatileuint2restrictp,\n"
+    "                                              local volatile long2 * restrict localvolatilelong2restrictp,\n"
+    "                                              local volatile ulong2*restrict localvolatileulong2restrictp,\n"
+    "                                              local volatile float2 *restrict localvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n"
+    "                                           local const volatile uchar2 * localconstvolatileuchar2p,\n"
+    "                                           local const volatile short2*localconstvolatileshort2p,\n"
+    "                                           local const volatile ushort2 *localconstvolatileushort2p,\n"
+    "                                           local const volatile int2* localconstvolatileint2p,\n"
+    "                                           local const volatile uint2 * localconstvolatileuint2p,\n"
+    "                                           local const volatile long2*localconstvolatilelong2p,\n"
+    "                                           local const volatile ulong2 *localconstvolatileulong2p,\n"
+    "                                           local const volatile float2* localconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n"
+    "                                                    local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n"
+    "                                                    local const volatile short2 *restrict localconstvolatileshort2restrictp,\n"
+    "                                                    local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n"
+    "                                                    local const volatile int2 * restrict localconstvolatileint2restrictp,\n"
+    "                                                    local const volatile uint2*restrict localconstvolatileuint2restrictp,\n"
+    "                                                    local const volatile long2 *restrict localconstvolatilelong2restrictp,\n"
+    "                                                    local const volatile ulong2* restrict localconstvolatileulong2restrictp,\n"
+    "                                                    local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector2_d(char2 char2d,\n"
+    "                      uchar2 uchar2d,\n"
+    "                      short2 short2d,\n"
+    "                      ushort2 ushort2d,\n"
+    "                      int2 int2d,\n"
+    "                      uint2 uint2d,\n"
+    "                      long2 long2d,\n"
+    "                      ulong2 ulong2d,\n"
+    "                      float2 float2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector2_d(const char2 constchar2d,\n"
+    "                            const uchar2 constuchar2d,\n"
+    "                            const short2 constshort2d,\n"
+    "                            const ushort2 constushort2d,\n"
+    "                            const int2 constint2d,\n"
+    "                            const uint2 constuint2d,\n"
+    "                            const long2 constlong2d,\n"
+    "                            const ulong2 constulong2d,\n"
+    "                            const float2 constfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector2_d(private char2 privatechar2d,\n"
+    "                              private uchar2 privateuchar2d,\n"
+    "                              private short2 privateshort2d,\n"
+    "                              private ushort2 privateushort2d,\n"
+    "                              private int2 privateint2d,\n"
+    "                              private uint2 privateuint2d,\n"
+    "                              private long2 privatelong2d,\n"
+    "                              private ulong2 privateulong2d,\n"
+    "                              private float2 privatefloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n"
+    "                                    private const uchar2 privateconstuchar2d,\n"
+    "                                    private const short2 privateconstshort2d,\n"
+    "                                    private const ushort2 privateconstushort2d,\n"
+    "                                    private const int2 privateconstint2d,\n"
+    "                                    private const uint2 privateconstuint2d,\n"
+    "                                    private const long2 privateconstlong2d,\n"
+    "                                    private const ulong2 privateconstulong2d,\n"
+    "                                    private const float2 privateconstfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p0(constant char3*constantchar3p,\n"
+    "                               constant uchar3 *constantuchar3p,\n"
+    "                               constant short3* constantshort3p,\n"
+    "                               constant ushort3 * constantushort3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p1(constant int3*constantint3p,\n"
+    "                               constant uint3 *constantuint3p,\n"
+    "                               constant long3* constantlong3p,\n"
+    "                               constant ulong3 * constantulong3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n"
+    "                                        constant uchar3* restrict constantuchar3restrictp,\n"
+    "                                        constant short3 * restrict constantshort3restrictp,\n"
+    "                                        constant ushort3*restrict constantushort3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n"
+    "                                        constant uint3* restrict constantuint3restrictp,\n"
+    "                                        constant long3 * restrict constantlong3restrictp,\n"
+    "                                        constant ulong3*restrict constantulong3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_p(global char3*globalchar3p,\n"
+    "                             global uchar3 *globaluchar3p,\n"
+    "                             global short3* globalshort3p,\n"
+    "                             global ushort3 * globalushort3p,\n"
+    "                             global int3*globalint3p,\n"
+    "                             global uint3 *globaluint3p,\n"
+    "                             global long3* globallong3p,\n"
+    "                             global ulong3 * globalulong3p,\n"
+    "                             global float3*globalfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n"
+    "                                      global uchar3* restrict globaluchar3restrictp,\n"
+    "                                      global short3 * restrict globalshort3restrictp,\n"
+    "                                      global ushort3*restrict globalushort3restrictp,\n"
+    "                                      global int3 *restrict globalint3restrictp,\n"
+    "                                      global uint3* restrict globaluint3restrictp,\n"
+    "                                      global long3 * restrict globallong3restrictp,\n"
+    "                                      global ulong3*restrict globalulong3restrictp,\n"
+    "                                      global float3 *restrict globalfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n"
+    "                                   global const uchar3 * globalconstuchar3p,\n"
+    "                                   global const short3*globalconstshort3p,\n"
+    "                                   global const ushort3 *globalconstushort3p,\n"
+    "                                   global const int3* globalconstint3p,\n"
+    "                                   global const uint3 * globalconstuint3p,\n"
+    "                                   global const long3*globalconstlong3p,\n"
+    "                                   global const ulong3 *globalconstulong3p,\n"
+    "                                   global const float3* globalconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n"
+    "                                            global const uchar3*restrict globalconstuchar3restrictp,\n"
+    "                                            global const short3 *restrict globalconstshort3restrictp,\n"
+    "                                            global const ushort3* restrict globalconstushort3restrictp,\n"
+    "                                            global const int3 * restrict globalconstint3restrictp,\n"
+    "                                            global const uint3*restrict globalconstuint3restrictp,\n"
+    "                                            global const long3 *restrict globalconstlong3restrictp,\n"
+    "                                            global const ulong3* restrict globalconstulong3restrictp,\n"
+    "                                            global const float3 * restrict globalconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n"
+    "                                      global volatile uchar3 *globalvolatileuchar3p,\n"
+    "                                      global volatile short3* globalvolatileshort3p,\n"
+    "                                      global volatile ushort3 * globalvolatileushort3p,\n"
+    "                                      global volatile int3*globalvolatileint3p,\n"
+    "                                      global volatile uint3 *globalvolatileuint3p,\n"
+    "                                      global volatile long3* globalvolatilelong3p,\n"
+    "                                      global volatile ulong3 * globalvolatileulong3p,\n"
+    "                                      global volatile float3*globalvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n"
+    "                                               global volatile uchar3* restrict globalvolatileuchar3restrictp,\n"
+    "                                               global volatile short3 * restrict globalvolatileshort3restrictp,\n"
+    "                                               global volatile ushort3*restrict globalvolatileushort3restrictp,\n"
+    "                                               global volatile int3 *restrict globalvolatileint3restrictp,\n"
+    "                                               global volatile uint3* restrict globalvolatileuint3restrictp,\n"
+    "                                               global volatile long3 * restrict globalvolatilelong3restrictp,\n"
+    "                                               global volatile ulong3*restrict globalvolatileulong3restrictp,\n"
+    "                                               global volatile float3 *restrict globalvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n"
+    "                                            global const volatile uchar3 * globalconstvolatileuchar3p,\n"
+    "                                            global const volatile short3*globalconstvolatileshort3p,\n"
+    "                                            global const volatile ushort3 *globalconstvolatileushort3p,\n"
+    "                                            global const volatile int3* globalconstvolatileint3p,\n"
+    "                                            global const volatile uint3 * globalconstvolatileuint3p,\n"
+    "                                            global const volatile long3*globalconstvolatilelong3p,\n"
+    "                                            global const volatile ulong3 *globalconstvolatileulong3p,\n"
+    "                                            global const volatile float3* globalconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n"
+    "                                                     global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n"
+    "                                                     global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n"
+    "                                                     global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n"
+    "                                                     global const volatile int3 * restrict globalconstvolatileint3restrictp,\n"
+    "                                                     global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n"
+    "                                                     global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n"
+    "                                                     global const volatile ulong3* restrict globalconstvolatileulong3restrictp,\n"
+    "                                                     global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_p(local char3*localchar3p,\n"
+    "                            local uchar3 *localuchar3p,\n"
+    "                            local short3* localshort3p,\n"
+    "                            local ushort3 * localushort3p,\n"
+    "                            local int3*localint3p,\n"
+    "                            local uint3 *localuint3p,\n"
+    "                            local long3* locallong3p,\n"
+    "                            local ulong3 * localulong3p,\n"
+    "                            local float3*localfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n"
+    "                                     local uchar3* restrict localuchar3restrictp,\n"
+    "                                     local short3 * restrict localshort3restrictp,\n"
+    "                                     local ushort3*restrict localushort3restrictp,\n"
+    "                                     local int3 *restrict localint3restrictp,\n"
+    "                                     local uint3* restrict localuint3restrictp,\n"
+    "                                     local long3 * restrict locallong3restrictp,\n"
+    "                                     local ulong3*restrict localulong3restrictp,\n"
+    "                                     local float3 *restrict localfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n"
+    "                                  local const uchar3 * localconstuchar3p,\n"
+    "                                  local const short3*localconstshort3p,\n"
+    "                                  local const ushort3 *localconstushort3p,\n"
+    "                                  local const int3* localconstint3p,\n"
+    "                                  local const uint3 * localconstuint3p,\n"
+    "                                  local const long3*localconstlong3p,\n"
+    "                                  local const ulong3 *localconstulong3p,\n"
+    "                                  local const float3* localconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n"
+    "                                           local const uchar3*restrict localconstuchar3restrictp,\n"
+    "                                           local const short3 *restrict localconstshort3restrictp,\n"
+    "                                           local const ushort3* restrict localconstushort3restrictp,\n"
+    "                                           local const int3 * restrict localconstint3restrictp,\n"
+    "                                           local const uint3*restrict localconstuint3restrictp,\n"
+    "                                           local const long3 *restrict localconstlong3restrictp,\n"
+    "                                           local const ulong3* restrict localconstulong3restrictp,\n"
+    "                                           local const float3 * restrict localconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n"
+    "                                     local volatile uchar3 *localvolatileuchar3p,\n"
+    "                                     local volatile short3* localvolatileshort3p,\n"
+    "                                     local volatile ushort3 * localvolatileushort3p,\n"
+    "                                     local volatile int3*localvolatileint3p,\n"
+    "                                     local volatile uint3 *localvolatileuint3p,\n"
+    "                                     local volatile long3* localvolatilelong3p,\n"
+    "                                     local volatile ulong3 * localvolatileulong3p,\n"
+    "                                     local volatile float3*localvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n"
+    "                                              local volatile uchar3* restrict localvolatileuchar3restrictp,\n"
+    "                                              local volatile short3 * restrict localvolatileshort3restrictp,\n"
+    "                                              local volatile ushort3*restrict localvolatileushort3restrictp,\n"
+    "                                              local volatile int3 *restrict localvolatileint3restrictp,\n"
+    "                                              local volatile uint3* restrict localvolatileuint3restrictp,\n"
+    "                                              local volatile long3 * restrict localvolatilelong3restrictp,\n"
+    "                                              local volatile ulong3*restrict localvolatileulong3restrictp,\n"
+    "                                              local volatile float3 *restrict localvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n"
+    "                                           local const volatile uchar3 * localconstvolatileuchar3p,\n"
+    "                                           local const volatile short3*localconstvolatileshort3p,\n"
+    "                                           local const volatile ushort3 *localconstvolatileushort3p,\n"
+    "                                           local const volatile int3* localconstvolatileint3p,\n"
+    "                                           local const volatile uint3 * localconstvolatileuint3p,\n"
+    "                                           local const volatile long3*localconstvolatilelong3p,\n"
+    "                                           local const volatile ulong3 *localconstvolatileulong3p,\n"
+    "                                           local const volatile float3* localconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n"
+    "                                                    local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n"
+    "                                                    local const volatile short3 *restrict localconstvolatileshort3restrictp,\n"
+    "                                                    local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n"
+    "                                                    local const volatile int3 * restrict localconstvolatileint3restrictp,\n"
+    "                                                    local const volatile uint3*restrict localconstvolatileuint3restrictp,\n"
+    "                                                    local const volatile long3 *restrict localconstvolatilelong3restrictp,\n"
+    "                                                    local const volatile ulong3* restrict localconstvolatileulong3restrictp,\n"
+    "                                                    local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector3_d(char3 char3d,\n"
+    "                      uchar3 uchar3d,\n"
+    "                      short3 short3d,\n"
+    "                      ushort3 ushort3d,\n"
+    "                      int3 int3d,\n"
+    "                      uint3 uint3d,\n"
+    "                      long3 long3d,\n"
+    "                      ulong3 ulong3d,\n"
+    "                      float3 float3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector3_d(const char3 constchar3d,\n"
+    "                            const uchar3 constuchar3d,\n"
+    "                            const short3 constshort3d,\n"
+    "                            const ushort3 constushort3d,\n"
+    "                            const int3 constint3d,\n"
+    "                            const uint3 constuint3d,\n"
+    "                            const long3 constlong3d,\n"
+    "                            const ulong3 constulong3d,\n"
+    "                            const float3 constfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector3_d(private char3 privatechar3d,\n"
+    "                              private uchar3 privateuchar3d,\n"
+    "                              private short3 privateshort3d,\n"
+    "                              private ushort3 privateushort3d,\n"
+    "                              private int3 privateint3d,\n"
+    "                              private uint3 privateuint3d,\n"
+    "                              private long3 privatelong3d,\n"
+    "                              private ulong3 privateulong3d,\n"
+    "                              private float3 privatefloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n"
+    "                                    private const uchar3 privateconstuchar3d,\n"
+    "                                    private const short3 privateconstshort3d,\n"
+    "                                    private const ushort3 privateconstushort3d,\n"
+    "                                    private const int3 privateconstint3d,\n"
+    "                                    private const uint3 privateconstuint3d,\n"
+    "                                    private const long3 privateconstlong3d,\n"
+    "                                    private const ulong3 privateconstulong3d,\n"
+    "                                    private const float3 privateconstfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p0(constant char4*constantchar4p,\n"
+    "                               constant uchar4 *constantuchar4p,\n"
+    "                               constant short4* constantshort4p,\n"
+    "                               constant ushort4 * constantushort4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p1(constant int4*constantint4p,\n"
+    "                               constant uint4 *constantuint4p,\n"
+    "                               constant long4* constantlong4p,\n"
+    "                               constant ulong4 * constantulong4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n"
+    "                                        constant uchar4* restrict constantuchar4restrictp,\n"
+    "                                        constant short4 * restrict constantshort4restrictp,\n"
+    "                                        constant ushort4*restrict constantushort4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n"
+    "                                        constant uint4* restrict constantuint4restrictp,\n"
+    "                                        constant long4 * restrict constantlong4restrictp,\n"
+    "                                        constant ulong4*restrict constantulong4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_p(global char4*globalchar4p,\n"
+    "                             global uchar4 *globaluchar4p,\n"
+    "                             global short4* globalshort4p,\n"
+    "                             global ushort4 * globalushort4p,\n"
+    "                             global int4*globalint4p,\n"
+    "                             global uint4 *globaluint4p,\n"
+    "                             global long4* globallong4p,\n"
+    "                             global ulong4 * globalulong4p,\n"
+    "                             global float4*globalfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n"
+    "                                      global uchar4* restrict globaluchar4restrictp,\n"
+    "                                      global short4 * restrict globalshort4restrictp,\n"
+    "                                      global ushort4*restrict globalushort4restrictp,\n"
+    "                                      global int4 *restrict globalint4restrictp,\n"
+    "                                      global uint4* restrict globaluint4restrictp,\n"
+    "                                      global long4 * restrict globallong4restrictp,\n"
+    "                                      global ulong4*restrict globalulong4restrictp,\n"
+    "                                      global float4 *restrict globalfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n"
+    "                                   global const uchar4 * globalconstuchar4p,\n"
+    "                                   global const short4*globalconstshort4p,\n"
+    "                                   global const ushort4 *globalconstushort4p,\n"
+    "                                   global const int4* globalconstint4p,\n"
+    "                                   global const uint4 * globalconstuint4p,\n"
+    "                                   global const long4*globalconstlong4p,\n"
+    "                                   global const ulong4 *globalconstulong4p,\n"
+    "                                   global const float4* globalconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n"
+    "                                            global const uchar4*restrict globalconstuchar4restrictp,\n"
+    "                                            global const short4 *restrict globalconstshort4restrictp,\n"
+    "                                            global const ushort4* restrict globalconstushort4restrictp,\n"
+    "                                            global const int4 * restrict globalconstint4restrictp,\n"
+    "                                            global const uint4*restrict globalconstuint4restrictp,\n"
+    "                                            global const long4 *restrict globalconstlong4restrictp,\n"
+    "                                            global const ulong4* restrict globalconstulong4restrictp,\n"
+    "                                            global const float4 * restrict globalconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n"
+    "                                      global volatile uchar4 *globalvolatileuchar4p,\n"
+    "                                      global volatile short4* globalvolatileshort4p,\n"
+    "                                      global volatile ushort4 * globalvolatileushort4p,\n"
+    "                                      global volatile int4*globalvolatileint4p,\n"
+    "                                      global volatile uint4 *globalvolatileuint4p,\n"
+    "                                      global volatile long4* globalvolatilelong4p,\n"
+    "                                      global volatile ulong4 * globalvolatileulong4p,\n"
+    "                                      global volatile float4*globalvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n"
+    "                                               global volatile uchar4* restrict globalvolatileuchar4restrictp,\n"
+    "                                               global volatile short4 * restrict globalvolatileshort4restrictp,\n"
+    "                                               global volatile ushort4*restrict globalvolatileushort4restrictp,\n"
+    "                                               global volatile int4 *restrict globalvolatileint4restrictp,\n"
+    "                                               global volatile uint4* restrict globalvolatileuint4restrictp,\n"
+    "                                               global volatile long4 * restrict globalvolatilelong4restrictp,\n"
+    "                                               global volatile ulong4*restrict globalvolatileulong4restrictp,\n"
+    "                                               global volatile float4 *restrict globalvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n"
+    "                                            global const volatile uchar4 * globalconstvolatileuchar4p,\n"
+    "                                            global const volatile short4*globalconstvolatileshort4p,\n"
+    "                                            global const volatile ushort4 *globalconstvolatileushort4p,\n"
+    "                                            global const volatile int4* globalconstvolatileint4p,\n"
+    "                                            global const volatile uint4 * globalconstvolatileuint4p,\n"
+    "                                            global const volatile long4*globalconstvolatilelong4p,\n"
+    "                                            global const volatile ulong4 *globalconstvolatileulong4p,\n"
+    "                                            global const volatile float4* globalconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n"
+    "                                                     global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n"
+    "                                                     global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n"
+    "                                                     global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n"
+    "                                                     global const volatile int4 * restrict globalconstvolatileint4restrictp,\n"
+    "                                                     global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n"
+    "                                                     global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n"
+    "                                                     global const volatile ulong4* restrict globalconstvolatileulong4restrictp,\n"
+    "                                                     global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_p(local char4*localchar4p,\n"
+    "                            local uchar4 *localuchar4p,\n"
+    "                            local short4* localshort4p,\n"
+    "                            local ushort4 * localushort4p,\n"
+    "                            local int4*localint4p,\n"
+    "                            local uint4 *localuint4p,\n"
+    "                            local long4* locallong4p,\n"
+    "                            local ulong4 * localulong4p,\n"
+    "                            local float4*localfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n"
+    "                                     local uchar4* restrict localuchar4restrictp,\n"
+    "                                     local short4 * restrict localshort4restrictp,\n"
+    "                                     local ushort4*restrict localushort4restrictp,\n"
+    "                                     local int4 *restrict localint4restrictp,\n"
+    "                                     local uint4* restrict localuint4restrictp,\n"
+    "                                     local long4 * restrict locallong4restrictp,\n"
+    "                                     local ulong4*restrict localulong4restrictp,\n"
+    "                                     local float4 *restrict localfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n"
+    "                                  local const uchar4 * localconstuchar4p,\n"
+    "                                  local const short4*localconstshort4p,\n"
+    "                                  local const ushort4 *localconstushort4p,\n"
+    "                                  local const int4* localconstint4p,\n"
+    "                                  local const uint4 * localconstuint4p,\n"
+    "                                  local const long4*localconstlong4p,\n"
+    "                                  local const ulong4 *localconstulong4p,\n"
+    "                                  local const float4* localconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n"
+    "                                           local const uchar4*restrict localconstuchar4restrictp,\n"
+    "                                           local const short4 *restrict localconstshort4restrictp,\n"
+    "                                           local const ushort4* restrict localconstushort4restrictp,\n"
+    "                                           local const int4 * restrict localconstint4restrictp,\n"
+    "                                           local const uint4*restrict localconstuint4restrictp,\n"
+    "                                           local const long4 *restrict localconstlong4restrictp,\n"
+    "                                           local const ulong4* restrict localconstulong4restrictp,\n"
+    "                                           local const float4 * restrict localconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n"
+    "                                     local volatile uchar4 *localvolatileuchar4p,\n"
+    "                                     local volatile short4* localvolatileshort4p,\n"
+    "                                     local volatile ushort4 * localvolatileushort4p,\n"
+    "                                     local volatile int4*localvolatileint4p,\n"
+    "                                     local volatile uint4 *localvolatileuint4p,\n"
+    "                                     local volatile long4* localvolatilelong4p,\n"
+    "                                     local volatile ulong4 * localvolatileulong4p,\n"
+    "                                     local volatile float4*localvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n"
+    "                                              local volatile uchar4* restrict localvolatileuchar4restrictp,\n"
+    "                                              local volatile short4 * restrict localvolatileshort4restrictp,\n"
+    "                                              local volatile ushort4*restrict localvolatileushort4restrictp,\n"
+    "                                              local volatile int4 *restrict localvolatileint4restrictp,\n"
+    "                                              local volatile uint4* restrict localvolatileuint4restrictp,\n"
+    "                                              local volatile long4 * restrict localvolatilelong4restrictp,\n"
+    "                                              local volatile ulong4*restrict localvolatileulong4restrictp,\n"
+    "                                              local volatile float4 *restrict localvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n"
+    "                                           local const volatile uchar4 * localconstvolatileuchar4p,\n"
+    "                                           local const volatile short4*localconstvolatileshort4p,\n"
+    "                                           local const volatile ushort4 *localconstvolatileushort4p,\n"
+    "                                           local const volatile int4* localconstvolatileint4p,\n"
+    "                                           local const volatile uint4 * localconstvolatileuint4p,\n"
+    "                                           local const volatile long4*localconstvolatilelong4p,\n"
+    "                                           local const volatile ulong4 *localconstvolatileulong4p,\n"
+    "                                           local const volatile float4* localconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n"
+    "                                                    local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n"
+    "                                                    local const volatile short4 *restrict localconstvolatileshort4restrictp,\n"
+    "                                                    local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n"
+    "                                                    local const volatile int4 * restrict localconstvolatileint4restrictp,\n"
+    "                                                    local const volatile uint4*restrict localconstvolatileuint4restrictp,\n"
+    "                                                    local const volatile long4 *restrict localconstvolatilelong4restrictp,\n"
+    "                                                    local const volatile ulong4* restrict localconstvolatileulong4restrictp,\n"
+    "                                                    local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector4_d(char4 char4d,\n"
+    "                      uchar4 uchar4d,\n"
+    "                      short4 short4d,\n"
+    "                      ushort4 ushort4d,\n"
+    "                      int4 int4d,\n"
+    "                      uint4 uint4d,\n"
+    "                      long4 long4d,\n"
+    "                      ulong4 ulong4d,\n"
+    "                      float4 float4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector4_d(const char4 constchar4d,\n"
+    "                            const uchar4 constuchar4d,\n"
+    "                            const short4 constshort4d,\n"
+    "                            const ushort4 constushort4d,\n"
+    "                            const int4 constint4d,\n"
+    "                            const uint4 constuint4d,\n"
+    "                            const long4 constlong4d,\n"
+    "                            const ulong4 constulong4d,\n"
+    "                            const float4 constfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector4_d(private char4 privatechar4d,\n"
+    "                              private uchar4 privateuchar4d,\n"
+    "                              private short4 privateshort4d,\n"
+    "                              private ushort4 privateushort4d,\n"
+    "                              private int4 privateint4d,\n"
+    "                              private uint4 privateuint4d,\n"
+    "                              private long4 privatelong4d,\n"
+    "                              private ulong4 privateulong4d,\n"
+    "                              private float4 privatefloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n"
+    "                                    private const uchar4 privateconstuchar4d,\n"
+    "                                    private const short4 privateconstshort4d,\n"
+    "                                    private const ushort4 privateconstushort4d,\n"
+    "                                    private const int4 privateconstint4d,\n"
+    "                                    private const uint4 privateconstuint4d,\n"
+    "                                    private const long4 privateconstlong4d,\n"
+    "                                    private const ulong4 privateconstulong4d,\n"
+    "                                    private const float4 privateconstfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p0(constant char8*constantchar8p,\n"
+    "                               constant uchar8 *constantuchar8p,\n"
+    "                               constant short8* constantshort8p,\n"
+    "                               constant ushort8 * constantushort8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p1(constant int8*constantint8p,\n"
+    "                               constant uint8 *constantuint8p,\n"
+    "                               constant long8* constantlong8p,\n"
+    "                               constant ulong8 * constantulong8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n"
+    "                                        constant uchar8* restrict constantuchar8restrictp,\n"
+    "                                        constant short8 * restrict constantshort8restrictp,\n"
+    "                                        constant ushort8*restrict constantushort8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n"
+    "                                        constant uint8* restrict constantuint8restrictp,\n"
+    "                                        constant long8 * restrict constantlong8restrictp,\n"
+    "                                        constant ulong8*restrict constantulong8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_p(global char8*globalchar8p,\n"
+    "                             global uchar8 *globaluchar8p,\n"
+    "                             global short8* globalshort8p,\n"
+    "                             global ushort8 * globalushort8p,\n"
+    "                             global int8*globalint8p,\n"
+    "                             global uint8 *globaluint8p,\n"
+    "                             global long8* globallong8p,\n"
+    "                             global ulong8 * globalulong8p,\n"
+    "                             global float8*globalfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n"
+    "                                      global uchar8* restrict globaluchar8restrictp,\n"
+    "                                      global short8 * restrict globalshort8restrictp,\n"
+    "                                      global ushort8*restrict globalushort8restrictp,\n"
+    "                                      global int8 *restrict globalint8restrictp,\n"
+    "                                      global uint8* restrict globaluint8restrictp,\n"
+    "                                      global long8 * restrict globallong8restrictp,\n"
+    "                                      global ulong8*restrict globalulong8restrictp,\n"
+    "                                      global float8 *restrict globalfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n"
+    "                                   global const uchar8 * globalconstuchar8p,\n"
+    "                                   global const short8*globalconstshort8p,\n"
+    "                                   global const ushort8 *globalconstushort8p,\n"
+    "                                   global const int8* globalconstint8p,\n"
+    "                                   global const uint8 * globalconstuint8p,\n"
+    "                                   global const long8*globalconstlong8p,\n"
+    "                                   global const ulong8 *globalconstulong8p,\n"
+    "                                   global const float8* globalconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n"
+    "                                            global const uchar8*restrict globalconstuchar8restrictp,\n"
+    "                                            global const short8 *restrict globalconstshort8restrictp,\n"
+    "                                            global const ushort8* restrict globalconstushort8restrictp,\n"
+    "                                            global const int8 * restrict globalconstint8restrictp,\n"
+    "                                            global const uint8*restrict globalconstuint8restrictp,\n"
+    "                                            global const long8 *restrict globalconstlong8restrictp,\n"
+    "                                            global const ulong8* restrict globalconstulong8restrictp,\n"
+    "                                            global const float8 * restrict globalconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n"
+    "                                      global volatile uchar8 *globalvolatileuchar8p,\n"
+    "                                      global volatile short8* globalvolatileshort8p,\n"
+    "                                      global volatile ushort8 * globalvolatileushort8p,\n"
+    "                                      global volatile int8*globalvolatileint8p,\n"
+    "                                      global volatile uint8 *globalvolatileuint8p,\n"
+    "                                      global volatile long8* globalvolatilelong8p,\n"
+    "                                      global volatile ulong8 * globalvolatileulong8p,\n"
+    "                                      global volatile float8*globalvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n"
+    "                                               global volatile uchar8* restrict globalvolatileuchar8restrictp,\n"
+    "                                               global volatile short8 * restrict globalvolatileshort8restrictp,\n"
+    "                                               global volatile ushort8*restrict globalvolatileushort8restrictp,\n"
+    "                                               global volatile int8 *restrict globalvolatileint8restrictp,\n"
+    "                                               global volatile uint8* restrict globalvolatileuint8restrictp,\n"
+    "                                               global volatile long8 * restrict globalvolatilelong8restrictp,\n"
+    "                                               global volatile ulong8*restrict globalvolatileulong8restrictp,\n"
+    "                                               global volatile float8 *restrict globalvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n"
+    "                                            global const volatile uchar8 * globalconstvolatileuchar8p,\n"
+    "                                            global const volatile short8*globalconstvolatileshort8p,\n"
+    "                                            global const volatile ushort8 *globalconstvolatileushort8p,\n"
+    "                                            global const volatile int8* globalconstvolatileint8p,\n"
+    "                                            global const volatile uint8 * globalconstvolatileuint8p,\n"
+    "                                            global const volatile long8*globalconstvolatilelong8p,\n"
+    "                                            global const volatile ulong8 *globalconstvolatileulong8p,\n"
+    "                                            global const volatile float8* globalconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n"
+    "                                                     global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n"
+    "                                                     global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n"
+    "                                                     global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n"
+    "                                                     global const volatile int8 * restrict globalconstvolatileint8restrictp,\n"
+    "                                                     global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n"
+    "                                                     global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n"
+    "                                                     global const volatile ulong8* restrict globalconstvolatileulong8restrictp,\n"
+    "                                                     global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_p(local char8*localchar8p,\n"
+    "                            local uchar8 *localuchar8p,\n"
+    "                            local short8* localshort8p,\n"
+    "                            local ushort8 * localushort8p,\n"
+    "                            local int8*localint8p,\n"
+    "                            local uint8 *localuint8p,\n"
+    "                            local long8* locallong8p,\n"
+    "                            local ulong8 * localulong8p,\n"
+    "                            local float8*localfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n"
+    "                                     local uchar8* restrict localuchar8restrictp,\n"
+    "                                     local short8 * restrict localshort8restrictp,\n"
+    "                                     local ushort8*restrict localushort8restrictp,\n"
+    "                                     local int8 *restrict localint8restrictp,\n"
+    "                                     local uint8* restrict localuint8restrictp,\n"
+    "                                     local long8 * restrict locallong8restrictp,\n"
+    "                                     local ulong8*restrict localulong8restrictp,\n"
+    "                                     local float8 *restrict localfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n"
+    "                                  local const uchar8 * localconstuchar8p,\n"
+    "                                  local const short8*localconstshort8p,\n"
+    "                                  local const ushort8 *localconstushort8p,\n"
+    "                                  local const int8* localconstint8p,\n"
+    "                                  local const uint8 * localconstuint8p,\n"
+    "                                  local const long8*localconstlong8p,\n"
+    "                                  local const ulong8 *localconstulong8p,\n"
+    "                                  local const float8* localconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n"
+    "                                           local const uchar8*restrict localconstuchar8restrictp,\n"
+    "                                           local const short8 *restrict localconstshort8restrictp,\n"
+    "                                           local const ushort8* restrict localconstushort8restrictp,\n"
+    "                                           local const int8 * restrict localconstint8restrictp,\n"
+    "                                           local const uint8*restrict localconstuint8restrictp,\n"
+    "                                           local const long8 *restrict localconstlong8restrictp,\n"
+    "                                           local const ulong8* restrict localconstulong8restrictp,\n"
+    "                                           local const float8 * restrict localconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n"
+    "                                     local volatile uchar8 *localvolatileuchar8p,\n"
+    "                                     local volatile short8* localvolatileshort8p,\n"
+    "                                     local volatile ushort8 * localvolatileushort8p,\n"
+    "                                     local volatile int8*localvolatileint8p,\n"
+    "                                     local volatile uint8 *localvolatileuint8p,\n"
+    "                                     local volatile long8* localvolatilelong8p,\n"
+    "                                     local volatile ulong8 * localvolatileulong8p,\n"
+    "                                     local volatile float8*localvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n"
+    "                                              local volatile uchar8* restrict localvolatileuchar8restrictp,\n"
+    "                                              local volatile short8 * restrict localvolatileshort8restrictp,\n"
+    "                                              local volatile ushort8*restrict localvolatileushort8restrictp,\n"
+    "                                              local volatile int8 *restrict localvolatileint8restrictp,\n"
+    "                                              local volatile uint8* restrict localvolatileuint8restrictp,\n"
+    "                                              local volatile long8 * restrict localvolatilelong8restrictp,\n"
+    "                                              local volatile ulong8*restrict localvolatileulong8restrictp,\n"
+    "                                              local volatile float8 *restrict localvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n"
+    "                                           local const volatile uchar8 * localconstvolatileuchar8p,\n"
+    "                                           local const volatile short8*localconstvolatileshort8p,\n"
+    "                                           local const volatile ushort8 *localconstvolatileushort8p,\n"
+    "                                           local const volatile int8* localconstvolatileint8p,\n"
+    "                                           local const volatile uint8 * localconstvolatileuint8p,\n"
+    "                                           local const volatile long8*localconstvolatilelong8p,\n"
+    "                                           local const volatile ulong8 *localconstvolatileulong8p,\n"
+    "                                           local const volatile float8* localconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n"
+    "                                                    local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n"
+    "                                                    local const volatile short8 *restrict localconstvolatileshort8restrictp,\n"
+    "                                                    local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n"
+    "                                                    local const volatile int8 * restrict localconstvolatileint8restrictp,\n"
+    "                                                    local const volatile uint8*restrict localconstvolatileuint8restrictp,\n"
+    "                                                    local const volatile long8 *restrict localconstvolatilelong8restrictp,\n"
+    "                                                    local const volatile ulong8* restrict localconstvolatileulong8restrictp,\n"
+    "                                                    local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector8_d(char8 char8d,\n"
+    "                      uchar8 uchar8d,\n"
+    "                      short8 short8d,\n"
+    "                      ushort8 ushort8d,\n"
+    "                      int8 int8d,\n"
+    "                      uint8 uint8d,\n"
+    "                      long8 long8d,\n"
+    "                      ulong8 ulong8d,\n"
+    "                      float8 float8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector8_d(const char8 constchar8d,\n"
+    "                            const uchar8 constuchar8d,\n"
+    "                            const short8 constshort8d,\n"
+    "                            const ushort8 constushort8d,\n"
+    "                            const int8 constint8d,\n"
+    "                            const uint8 constuint8d,\n"
+    "                            const long8 constlong8d,\n"
+    "                            const ulong8 constulong8d,\n"
+    "                            const float8 constfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector8_d(private char8 privatechar8d,\n"
+    "                              private uchar8 privateuchar8d,\n"
+    "                              private short8 privateshort8d,\n"
+    "                              private ushort8 privateushort8d,\n"
+    "                              private int8 privateint8d,\n"
+    "                              private uint8 privateuint8d,\n"
+    "                              private long8 privatelong8d,\n"
+    "                              private ulong8 privateulong8d,\n"
+    "                              private float8 privatefloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n"
+    "                                    private const uchar8 privateconstuchar8d,\n"
+    "                                    private const short8 privateconstshort8d,\n"
+    "                                    private const ushort8 privateconstushort8d,\n"
+    "                                    private const int8 privateconstint8d,\n"
+    "                                    private const uint8 privateconstuint8d,\n"
+    "                                    private const long8 privateconstlong8d,\n"
+    "                                    private const ulong8 privateconstulong8d,\n"
+    "                                    private const float8 privateconstfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p0(constant char16*constantchar16p,\n"
+    "                                constant uchar16 *constantuchar16p,\n"
+    "                                constant short16* constantshort16p,\n"
+    "                                constant ushort16 * constantushort16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p1(constant int16*constantint16p,\n"
+    "                                constant uint16 *constantuint16p,\n"
+    "                                constant long16* constantlong16p,\n"
+    "                                constant ulong16 * constantulong16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n"
+    "                                         constant uchar16* restrict constantuchar16restrictp,\n"
+    "                                         constant short16 * restrict constantshort16restrictp,\n"
+    "                                         constant ushort16*restrict constantushort16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n"
+    "                                         constant uint16* restrict constantuint16restrictp,\n"
+    "                                         constant long16 * restrict constantlong16restrictp,\n"
+    "                                         constant ulong16*restrict constantulong16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_p(global char16*globalchar16p,\n"
+    "                              global uchar16 *globaluchar16p,\n"
+    "                              global short16* globalshort16p,\n"
+    "                              global ushort16 * globalushort16p,\n"
+    "                              global int16*globalint16p,\n"
+    "                              global uint16 *globaluint16p,\n"
+    "                              global long16* globallong16p,\n"
+    "                              global ulong16 * globalulong16p,\n"
+    "                              global float16*globalfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n"
+    "                                       global uchar16* restrict globaluchar16restrictp,\n"
+    "                                       global short16 * restrict globalshort16restrictp,\n"
+    "                                       global ushort16*restrict globalushort16restrictp,\n"
+    "                                       global int16 *restrict globalint16restrictp,\n"
+    "                                       global uint16* restrict globaluint16restrictp,\n"
+    "                                       global long16 * restrict globallong16restrictp,\n"
+    "                                       global ulong16*restrict globalulong16restrictp,\n"
+    "                                       global float16 *restrict globalfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n"
+    "                                    global const uchar16 * globalconstuchar16p,\n"
+    "                                    global const short16*globalconstshort16p,\n"
+    "                                    global const ushort16 *globalconstushort16p,\n"
+    "                                    global const int16* globalconstint16p,\n"
+    "                                    global const uint16 * globalconstuint16p,\n"
+    "                                    global const long16*globalconstlong16p,\n"
+    "                                    global const ulong16 *globalconstulong16p,\n"
+    "                                    global const float16* globalconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n"
+    "                                             global const uchar16*restrict globalconstuchar16restrictp,\n"
+    "                                             global const short16 *restrict globalconstshort16restrictp,\n"
+    "                                             global const ushort16* restrict globalconstushort16restrictp,\n"
+    "                                             global const int16 * restrict globalconstint16restrictp,\n"
+    "                                             global const uint16*restrict globalconstuint16restrictp,\n"
+    "                                             global const long16 *restrict globalconstlong16restrictp,\n"
+    "                                             global const ulong16* restrict globalconstulong16restrictp,\n"
+    "                                             global const float16 * restrict globalconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n"
+    "                                       global volatile uchar16 *globalvolatileuchar16p,\n"
+    "                                       global volatile short16* globalvolatileshort16p,\n"
+    "                                       global volatile ushort16 * globalvolatileushort16p,\n"
+    "                                       global volatile int16*globalvolatileint16p,\n"
+    "                                       global volatile uint16 *globalvolatileuint16p,\n"
+    "                                       global volatile long16* globalvolatilelong16p,\n"
+    "                                       global volatile ulong16 * globalvolatileulong16p,\n"
+    "                                       global volatile float16*globalvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n"
+    "                                                global volatile uchar16* restrict globalvolatileuchar16restrictp,\n"
+    "                                                global volatile short16 * restrict globalvolatileshort16restrictp,\n"
+    "                                                global volatile ushort16*restrict globalvolatileushort16restrictp,\n"
+    "                                                global volatile int16 *restrict globalvolatileint16restrictp,\n"
+    "                                                global volatile uint16* restrict globalvolatileuint16restrictp,\n"
+    "                                                global volatile long16 * restrict globalvolatilelong16restrictp,\n"
+    "                                                global volatile ulong16*restrict globalvolatileulong16restrictp,\n"
+    "                                                global volatile float16 *restrict globalvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n"
+    "                                             global const volatile uchar16 * globalconstvolatileuchar16p,\n"
+    "                                             global const volatile short16*globalconstvolatileshort16p,\n"
+    "                                             global const volatile ushort16 *globalconstvolatileushort16p,\n"
+    "                                             global const volatile int16* globalconstvolatileint16p,\n"
+    "                                             global const volatile uint16 * globalconstvolatileuint16p,\n"
+    "                                             global const volatile long16*globalconstvolatilelong16p,\n"
+    "                                             global const volatile ulong16 *globalconstvolatileulong16p,\n"
+    "                                             global const volatile float16* globalconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n"
+    "                                                      global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n"
+    "                                                      global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n"
+    "                                                      global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n"
+    "                                                      global const volatile int16 * restrict globalconstvolatileint16restrictp,\n"
+    "                                                      global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n"
+    "                                                      global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n"
+    "                                                      global const volatile ulong16* restrict globalconstvolatileulong16restrictp,\n"
+    "                                                      global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_p(local char16*localchar16p,\n"
+    "                             local uchar16 *localuchar16p,\n"
+    "                             local short16* localshort16p,\n"
+    "                             local ushort16 * localushort16p,\n"
+    "                             local int16*localint16p,\n"
+    "                             local uint16 *localuint16p,\n"
+    "                             local long16* locallong16p,\n"
+    "                             local ulong16 * localulong16p,\n"
+    "                             local float16*localfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n"
+    "                                      local uchar16* restrict localuchar16restrictp,\n"
+    "                                      local short16 * restrict localshort16restrictp,\n"
+    "                                      local ushort16*restrict localushort16restrictp,\n"
+    "                                      local int16 *restrict localint16restrictp,\n"
+    "                                      local uint16* restrict localuint16restrictp,\n"
+    "                                      local long16 * restrict locallong16restrictp,\n"
+    "                                      local ulong16*restrict localulong16restrictp,\n"
+    "                                      local float16 *restrict localfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n"
+    "                                   local const uchar16 * localconstuchar16p,\n"
+    "                                   local const short16*localconstshort16p,\n"
+    "                                   local const ushort16 *localconstushort16p,\n"
+    "                                   local const int16* localconstint16p,\n"
+    "                                   local const uint16 * localconstuint16p,\n"
+    "                                   local const long16*localconstlong16p,\n"
+    "                                   local const ulong16 *localconstulong16p,\n"
+    "                                   local const float16* localconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n"
+    "                                            local const uchar16*restrict localconstuchar16restrictp,\n"
+    "                                            local const short16 *restrict localconstshort16restrictp,\n"
+    "                                            local const ushort16* restrict localconstushort16restrictp,\n"
+    "                                            local const int16 * restrict localconstint16restrictp,\n"
+    "                                            local const uint16*restrict localconstuint16restrictp,\n"
+    "                                            local const long16 *restrict localconstlong16restrictp,\n"
+    "                                            local const ulong16* restrict localconstulong16restrictp,\n"
+    "                                            local const float16 * restrict localconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n"
+    "                                      local volatile uchar16 *localvolatileuchar16p,\n"
+    "                                      local volatile short16* localvolatileshort16p,\n"
+    "                                      local volatile ushort16 * localvolatileushort16p,\n"
+    "                                      local volatile int16*localvolatileint16p,\n"
+    "                                      local volatile uint16 *localvolatileuint16p,\n"
+    "                                      local volatile long16* localvolatilelong16p,\n"
+    "                                      local volatile ulong16 * localvolatileulong16p,\n"
+    "                                      local volatile float16*localvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n"
+    "                                               local volatile uchar16* restrict localvolatileuchar16restrictp,\n"
+    "                                               local volatile short16 * restrict localvolatileshort16restrictp,\n"
+    "                                               local volatile ushort16*restrict localvolatileushort16restrictp,\n"
+    "                                               local volatile int16 *restrict localvolatileint16restrictp,\n"
+    "                                               local volatile uint16* restrict localvolatileuint16restrictp,\n"
+    "                                               local volatile long16 * restrict localvolatilelong16restrictp,\n"
+    "                                               local volatile ulong16*restrict localvolatileulong16restrictp,\n"
+    "                                               local volatile float16 *restrict localvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n"
+    "                                            local const volatile uchar16 * localconstvolatileuchar16p,\n"
+    "                                            local const volatile short16*localconstvolatileshort16p,\n"
+    "                                            local const volatile ushort16 *localconstvolatileushort16p,\n"
+    "                                            local const volatile int16* localconstvolatileint16p,\n"
+    "                                            local const volatile uint16 * localconstvolatileuint16p,\n"
+    "                                            local const volatile long16*localconstvolatilelong16p,\n"
+    "                                            local const volatile ulong16 *localconstvolatileulong16p,\n"
+    "                                            local const volatile float16* localconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n"
+    "                                                     local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n"
+    "                                                     local const volatile short16 *restrict localconstvolatileshort16restrictp,\n"
+    "                                                     local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n"
+    "                                                     local const volatile int16 * restrict localconstvolatileint16restrictp,\n"
+    "                                                     local const volatile uint16*restrict localconstvolatileuint16restrictp,\n"
+    "                                                     local const volatile long16 *restrict localconstvolatilelong16restrictp,\n"
+    "                                                     local const volatile ulong16* restrict localconstvolatileulong16restrictp,\n"
+    "                                                     local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector16_d(char16 char16d,\n"
+    "                       uchar16 uchar16d,\n"
+    "                       short16 short16d,\n"
+    "                       ushort16 ushort16d,\n"
+    "                       int16 int16d,\n"
+    "                       uint16 uint16d,\n"
+    "                       long16 long16d,\n"
+    "                       ulong16 ulong16d,\n"
+    "                       float16 float16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector16_d(const char16 constchar16d,\n"
+    "                             const uchar16 constuchar16d,\n"
+    "                             const short16 constshort16d,\n"
+    "                             const ushort16 constushort16d,\n"
+    "                             const int16 constint16d,\n"
+    "                             const uint16 constuint16d,\n"
+    "                             const long16 constlong16d,\n"
+    "                             const ulong16 constulong16d,\n"
+    "                             const float16 constfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector16_d(private char16 privatechar16d,\n"
+    "                               private uchar16 privateuchar16d,\n"
+    "                               private short16 privateshort16d,\n"
+    "                               private ushort16 privateushort16d,\n"
+    "                               private int16 privateint16d,\n"
+    "                               private uint16 privateuint16d,\n"
+    "                               private long16 privatelong16d,\n"
+    "                               private ulong16 privateulong16d,\n"
+    "                               private float16 privatefloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n"
+    "                                     private const uchar16 privateconstuchar16d,\n"
+    "                                     private const short16 privateconstshort16d,\n"
+    "                                     private const ushort16 privateconstushort16d,\n"
+    "                                     private const int16 privateconstint16d,\n"
+    "                                     private const uint16 privateconstuint16d,\n"
+    "                                     private const long16 privateconstlong16d,\n"
+    "                                     private const ulong16 privateconstulong16d,\n"
+    "                                     private const float16 privateconstfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n"
+    "                               constant struct struct_type *constantstructstruct_typep,\n"
+    "                               constant typedef_struct_type* constanttypedef_struct_typep,\n"
+    "                               constant union union_type * constantunionunion_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n"
+    "                               constant enum enum_type *constantenumenum_typep,\n"
+    "                               constant typedef_enum_type* constanttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n"
+    "                                        constant struct struct_type*restrict constantstructstruct_typerestrictp,\n"
+    "                                        constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n"
+    "                                        constant union union_type* restrict constantunionunion_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n"
+    "                                        constant enum enum_type*restrict constantenumenum_typerestrictp,\n"
+    "                                        constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n"
+    "                             global struct struct_type *globalstructstruct_typep,\n"
+    "                             global typedef_struct_type* globaltypedef_struct_typep,\n"
+    "                             global union union_type * globalunionunion_typep,\n"
+    "                             global typedef_union_type*globaltypedef_union_typep,\n"
+    "                             global enum enum_type *globalenumenum_typep,\n"
+    "                             global typedef_enum_type* globaltypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n"
+    "                                      global struct struct_type*restrict globalstructstruct_typerestrictp,\n"
+    "                                      global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n"
+    "                                      global union union_type* restrict globalunionunion_typerestrictp,\n"
+    "                                      global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n"
+    "                                      global enum enum_type*restrict globalenumenum_typerestrictp,\n"
+    "                                      global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n"
+    "                                   global const struct struct_type * globalconststructstruct_typep,\n"
+    "                                   global const typedef_struct_type*globalconsttypedef_struct_typep,\n"
+    "                                   global const union union_type *globalconstunionunion_typep,\n"
+    "                                   global const typedef_union_type* globalconsttypedef_union_typep,\n"
+    "                                   global const enum enum_type * globalconstenumenum_typep,\n"
+    "                                   global const typedef_enum_type*globalconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n"
+    "                                            global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n"
+    "                                            global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n"
+    "                                            global const union union_type*restrict globalconstunionunion_typerestrictp,\n"
+    "                                            global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n"
+    "                                            global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n"
+    "                                            global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n"
+    "                                      global volatile struct struct_type *globalvolatilestructstruct_typep,\n"
+    "                                      global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n"
+    "                                      global volatile union union_type * globalvolatileunionunion_typep,\n"
+    "                                      global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n"
+    "                                      global volatile enum enum_type *globalvolatileenumenum_typep,\n"
+    "                                      global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n"
+    "                                               global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n"
+    "                                               global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n"
+    "                                               global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n"
+    "                                               global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n"
+    "                                               global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n"
+    "                                               global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n"
+    "                                            global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n"
+    "                                            global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n"
+    "                                            global const volatile union union_type *globalconstvolatileunionunion_typep,\n"
+    "                                            global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n"
+    "                                            global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n"
+    "                                            global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n"
+    "                                                     global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n"
+    "                                                     global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                     global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n"
+    "                                                     global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                     global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n"
+    "                                                     global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n"
+    "                            local struct struct_type *localstructstruct_typep,\n"
+    "                            local typedef_struct_type* localtypedef_struct_typep,\n"
+    "                            local union union_type * localunionunion_typep,\n"
+    "                            local typedef_union_type*localtypedef_union_typep,\n"
+    "                            local enum enum_type *localenumenum_typep,\n"
+    "                            local typedef_enum_type* localtypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n"
+    "                                     local struct struct_type*restrict localstructstruct_typerestrictp,\n"
+    "                                     local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n"
+    "                                     local union union_type* restrict localunionunion_typerestrictp,\n"
+    "                                     local typedef_union_type * restrict localtypedef_union_typerestrictp,\n"
+    "                                     local enum enum_type*restrict localenumenum_typerestrictp,\n"
+    "                                     local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n"
+    "                                  local const struct struct_type * localconststructstruct_typep,\n"
+    "                                  local const typedef_struct_type*localconsttypedef_struct_typep,\n"
+    "                                  local const union union_type *localconstunionunion_typep,\n"
+    "                                  local const typedef_union_type* localconsttypedef_union_typep,\n"
+    "                                  local const enum enum_type * localconstenumenum_typep,\n"
+    "                                  local const typedef_enum_type*localconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n"
+    "                                           local const struct struct_type* restrict localconststructstruct_typerestrictp,\n"
+    "                                           local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n"
+    "                                           local const union union_type*restrict localconstunionunion_typerestrictp,\n"
+    "                                           local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n"
+    "                                           local const enum enum_type* restrict localconstenumenum_typerestrictp,\n"
+    "                                           local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n"
+    "                                     local volatile struct struct_type *localvolatilestructstruct_typep,\n"
+    "                                     local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n"
+    "                                     local volatile union union_type * localvolatileunionunion_typep,\n"
+    "                                     local volatile typedef_union_type*localvolatiletypedef_union_typep,\n"
+    "                                     local volatile enum enum_type *localvolatileenumenum_typep,\n"
+    "                                     local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n"
+    "                                              local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n"
+    "                                              local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n"
+    "                                              local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n"
+    "                                              local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n"
+    "                                              local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n"
+    "                                              local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n"
+    "                                           local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n"
+    "                                           local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n"
+    "                                           local const volatile union union_type *localconstvolatileunionunion_typep,\n"
+    "                                           local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n"
+    "                                           local const volatile enum enum_type * localconstvolatileenumenum_typep,\n"
+    "                                           local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n"
+    "                                                    local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n"
+    "                                                    local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                    local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n"
+    "                                                    local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                    local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n"
+    "                                                    local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void derived_d(typedef_type typedef_typed,\n"
+    "                      struct struct_type structstruct_typed,\n"
+    "                      typedef_struct_type typedef_struct_typed,\n"
+    "                      union union_type unionunion_typed,\n"
+    "                      typedef_union_type typedef_union_typed,\n"
+    "                      enum enum_type enumenum_typed,\n"
+    "                      typedef_enum_type typedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_derived_d(const typedef_type consttypedef_typed,\n"
+    "                            const struct struct_type conststructstruct_typed,\n"
+    "                            const typedef_struct_type consttypedef_struct_typed,\n"
+    "                            const union union_type constunionunion_typed,\n"
+    "                            const typedef_union_type consttypedef_union_typed,\n"
+    "                            const enum enum_type constenumenum_typed,\n"
+    "                            const typedef_enum_type consttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n"
+    "                              private struct struct_type privatestructstruct_typed,\n"
+    "                              private typedef_struct_type privatetypedef_struct_typed,\n"
+    "                              private union union_type privateunionunion_typed,\n"
+    "                              private typedef_union_type privatetypedef_union_typed,\n"
+    "                              private enum enum_type privateenumenum_typed,\n"
+    "                              private typedef_enum_type privatetypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n"
+    "                                    private const struct struct_type privateconststructstruct_typed,\n"
+    "                                    private const typedef_struct_type privateconsttypedef_struct_typed,\n"
+    "                                    private const union union_type privateconstunionunion_typed,\n"
+    "                                    private const typedef_union_type privateconsttypedef_union_typed,\n"
+    "                                    private const enum enum_type privateconstenumenum_typed,\n"
+    "                                    private const typedef_enum_type privateconsttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+};
+
+static const char * required_arg_info[][72] = {
+  // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+    {
+        "constant_scalar_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp",
+    NULL
+  },
+  {
+    "constant_scalar_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp",
+    NULL
+  },
+  {
+    "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp",
+    NULL
+  },
+  {
+    "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp",
+        NULL
+    },
+    {
+        "constant_scalar_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd",
+        NULL
+    },
+    {
+        "constant_vector2_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p",
+    NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p",
+    NULL
+    },
+    {
+        "constant_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp",
+    NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp",
+    NULL
+    },
+    {
+        "constant_vector2_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d",
+        NULL
+    },
+    {
+        "constant_vector3_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p",
+    NULL
+    },
+    {
+        "constant_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp",
+    NULL
+    },
+    {
+        "constant_vector3_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d",
+        NULL
+    },
+    {
+        "constant_vector4_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p",
+        NULL
+    },
+    {
+        "constant_vector4_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d",
+        NULL
+    },
+    {
+        "constant_vector8_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p",
+    NULL
+    },
+    {
+        "constant_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp",
+    NULL
+    },
+    {
+        "constant_vector8_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d",
+        NULL
+    },
+    {
+        "constant_vector16_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p",
+    NULL
+    },
+    {
+        "constant_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp",
+    NULL
+    },
+    {
+        "constant_vector16_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d",
+        NULL
+    },
+    {
+        "constant_derived_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep",
+        NULL
+    },
+    {
+        "constant_derived_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed",
+        NULL
+    },
+    {
+        "const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed",
+        NULL
+    },
+};
+
+// Support for optional image data type
+static const char * image_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n"
+    "kernel void image_d(read_only image2d_t image2d_td0,\n"
+    "                    write_only image2d_t image2d_td1,\n"
+    "                    read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3,\n"
+    "                    read_only image2d_array_t image2d_array_td4,\n"
+    "                    write_only image2d_array_t image2d_array_td5,\n"
+    "                    read_only image1d_t image1d_td6,\n"
+    "                    write_only image1d_t image1d_td7,\n"
+    "                    read_only image1d_buffer_t image1d_buffer_td8,\n"
+    "                    write_only image1d_buffer_t image1d_buffer_td9,\n"
+    "                    read_only image1d_array_t image1d_array_td10,\n"
+    "                    write_only image1d_array_t image1d_array_td11,\n"
+    "                    sampler_t sampler_td12)\n"
+    "{}\n",
+    "\n"
+};
+
+static const char * image_arg_info[][67] = {
+    {
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12",
+        NULL
+    },
+};
+
+// Support for optional double data type
+static const char * double_kernel_args[] = {
+    "kernel void double_scalar_p(constant double*constantdoublep,\n"
+    "                            constant double *restrict constantdoublerestrictp,\n"
+    "                            global double*globaldoublep,\n"
+    "                            global double *restrict globaldoublerestrictp,\n"
+    "                            global const double* globalconstdoublep,\n"
+    "                            global const double * restrict globalconstdoublerestrictp,\n"
+    "                            global volatile double*globalvolatiledoublep,\n"
+    "                            global volatile double *restrict globalvolatiledoublerestrictp,\n"
+    "                            global const volatile double* globalconstvolatiledoublep)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n"
+    "                             local double*localdoublep,\n"
+    "                             local double *restrict localdoublerestrictp,\n"
+    "                             local const double* localconstdoublep,\n"
+    "                             local const double * restrict localconstdoublerestrictp,\n"
+    "                             local volatile double*localvolatiledoublep,\n"
+    "                             local volatile double *restrict localvolatiledoublerestrictp,\n"
+    "                             local const volatile double* localconstvolatiledoublep,\n"
+    "                             local const volatile double * restrict localconstvolatiledoublerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_d(double doubled,\n"
+    "                            const double constdoubled,\n"
+    "                            private double privatedoubled,\n"
+    "                            private const double privateconstdoubled)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p(constant double2*constantdouble2p,\n"
+    "                             constant double2 *restrict constantdouble2restrictp,\n"
+    "                             global double2*globaldouble2p,\n"
+    "                             global double2 *restrict globaldouble2restrictp,\n"
+    "                             global const double2* globalconstdouble2p,\n"
+    "                             global const double2 * restrict globalconstdouble2restrictp,\n"
+    "                             global volatile double2*globalvolatiledouble2p,\n"
+    "                             global volatile double2 *restrict globalvolatiledouble2restrictp,\n"
+    "                             global const volatile double2* globalconstvolatiledouble2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n"
+    "                              local double2*localdouble2p,\n"
+    "                              local double2 *restrict localdouble2restrictp,\n"
+    "                              local const double2* localconstdouble2p,\n"
+    "                              local const double2 * restrict localconstdouble2restrictp,\n"
+    "                              local volatile double2*localvolatiledouble2p,\n"
+    "                              local volatile double2 *restrict localvolatiledouble2restrictp,\n"
+    "                              local const volatile double2* localconstvolatiledouble2p,\n"
+    "                              local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_d(double2 double2d,\n"
+    "                             const double2 constdouble2d,\n"
+    "                             private double2 privatedouble2d,\n"
+    "                             private const double2 privateconstdouble2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p(constant double3*constantdouble3p,\n"
+    "                             constant double3 *restrict constantdouble3restrictp,\n"
+    "                             global double3*globaldouble3p,\n"
+    "                             global double3 *restrict globaldouble3restrictp,\n"
+    "                             global const double3* globalconstdouble3p,\n"
+    "                             global const double3 * restrict globalconstdouble3restrictp,\n"
+    "                             global volatile double3*globalvolatiledouble3p,\n"
+    "                             global volatile double3 *restrict globalvolatiledouble3restrictp,\n"
+    "                             global const volatile double3* globalconstvolatiledouble3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n"
+    "                              local double3*localdouble3p,\n"
+    "                              local double3 *restrict localdouble3restrictp,\n"
+    "                              local const double3* localconstdouble3p,\n"
+    "                              local const double3 * restrict localconstdouble3restrictp,\n"
+    "                              local volatile double3*localvolatiledouble3p,\n"
+    "                              local volatile double3 *restrict localvolatiledouble3restrictp,\n"
+    "                              local const volatile double3* localconstvolatiledouble3p,\n"
+    "                              local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_d(double3 double3d,\n"
+    "                             const double3 constdouble3d,\n"
+    "                             private double3 privatedouble3d,\n"
+    "                             private const double3 privateconstdouble3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p(constant double4*constantdouble4p,\n"
+    "                             constant double4 *restrict constantdouble4restrictp,\n"
+    "                             global double4*globaldouble4p,\n"
+    "                             global double4 *restrict globaldouble4restrictp,\n"
+    "                             global const double4* globalconstdouble4p,\n"
+    "                             global const double4 * restrict globalconstdouble4restrictp,\n"
+    "                             global volatile double4*globalvolatiledouble4p,\n"
+    "                             global volatile double4 *restrict globalvolatiledouble4restrictp,\n"
+    "                             global const volatile double4* globalconstvolatiledouble4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n"
+    "                              local double4*localdouble4p,\n"
+    "                              local double4 *restrict localdouble4restrictp,\n"
+    "                              local const double4* localconstdouble4p,\n"
+    "                              local const double4 * restrict localconstdouble4restrictp,\n"
+    "                              local volatile double4*localvolatiledouble4p,\n"
+    "                              local volatile double4 *restrict localvolatiledouble4restrictp,\n"
+    "                              local const volatile double4* localconstvolatiledouble4p,\n"
+    "                              local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_d(double4 double4d,\n"
+    "                             const double4 constdouble4d,\n"
+    "                             private double4 privatedouble4d,\n"
+    "                             private const double4 privateconstdouble4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p(constant double8*constantdouble8p,\n"
+    "                             constant double8 *restrict constantdouble8restrictp,\n"
+    "                             global double8*globaldouble8p,\n"
+    "                             global double8 *restrict globaldouble8restrictp,\n"
+    "                             global const double8* globalconstdouble8p,\n"
+    "                             global const double8 * restrict globalconstdouble8restrictp,\n"
+    "                             global volatile double8*globalvolatiledouble8p,\n"
+    "                             global volatile double8 *restrict globalvolatiledouble8restrictp,\n"
+    "                             global const volatile double8* globalconstvolatiledouble8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n"
+    "                              local double8*localdouble8p,\n"
+    "                              local double8 *restrict localdouble8restrictp,\n"
+    "                              local const double8* localconstdouble8p,\n"
+    "                              local const double8 * restrict localconstdouble8restrictp,\n"
+    "                              local volatile double8*localvolatiledouble8p,\n"
+    "                              local volatile double8 *restrict localvolatiledouble8restrictp,\n"
+    "                              local const volatile double8* localconstvolatiledouble8p,\n"
+    "                              local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_d(double8 double8d,\n"
+    "                             const double8 constdouble8d,\n"
+    "                             private double8 privatedouble8d,\n"
+    "                             private const double8 privateconstdouble8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p(constant double16*constantdouble16p,\n"
+    "                              constant double16 *restrict constantdouble16restrictp,\n"
+    "                              global double16*globaldouble16p,\n"
+    "                              global double16 *restrict globaldouble16restrictp,\n"
+    "                              global const double16* globalconstdouble16p,\n"
+    "                              global const double16 * restrict globalconstdouble16restrictp,\n"
+    "                              global volatile double16*globalvolatiledouble16p,\n"
+    "                              global volatile double16 *restrict globalvolatiledouble16restrictp,\n"
+    "                              global const volatile double16* globalconstvolatiledouble16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n"
+    "                               local double16*localdouble16p,\n"
+    "                               local double16 *restrict localdouble16restrictp,\n"
+    "                               local const double16* localconstdouble16p,\n"
+    "                               local const double16 * restrict localconstdouble16restrictp,\n"
+    "                               local volatile double16*localvolatiledouble16p,\n"
+    "                               local volatile double16 *restrict localvolatiledouble16restrictp,\n"
+    "                               local const volatile double16* localconstvolatiledouble16p,\n"
+    "                               local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_d(double16 double16d,\n"
+    "                              const double16 constdouble16d,\n"
+    "                              private double16 privatedouble16d,\n"
+    "                              private const double16 privateconstdouble16d)\n"
+    "{}\n",
+    "\n"
+};
+
+static const char * double_arg_info[][77] = {
+    {
+        "double_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep",
+        NULL
+    },
+    {
+        "double_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp",
+        NULL
+    },
+    {
+        "double_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled",
+        NULL
+    },
+    {
+        "double_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p",
+        NULL
+    },
+    {
+        "double_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp",
+        NULL
+    },
+    {
+        "double_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d",
+        NULL
+    },
+    {
+        "double_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p",
+        NULL
+    },
+    {
+        "double_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp",
+        NULL
+    },
+    {
+        "double_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d",
+        NULL
+    },
+    {
+        "double_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p",
+        NULL
+    },
+    {
+        "double_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp",
+        NULL
+    },
+    {
+        "double_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d",
+        NULL
+    },
+    {
+        "double_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p",
+        NULL
+    },
+    {
+        "double_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp",
+        NULL
+    },
+    {
+        "double_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d",
+        NULL
+    },
+    {
+        "double_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p",
+        NULL
+    },
+    {
+        "double_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp",
+        NULL
+    },
+    {
+        "double_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d",
+        NULL
+    },
+};
+
+
+// Support for optional half data type
+static const char * half_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
+    "\n"
+    "kernel void half_scalar_p(constant half*constanthalfp,\n"
+    "                          constant half *restrict constanthalfrestrictp,\n"
+    "                          global half*globalhalfp,\n"
+    "                          global half *restrict globalhalfrestrictp,\n"
+    "                          global const half* globalconsthalfp,\n"
+    "                          global const half * restrict globalconsthalfrestrictp,\n"
+    "                          global volatile half*globalvolatilehalfp,\n"
+    "                          global volatile half *restrict globalvolatilehalfrestrictp,\n"
+    "                          global const volatile half* globalconstvolatilehalfp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n"
+    "                           local half*localhalfp,\n"
+    "                           local half *restrict localhalfrestrictp,\n"
+    "                           local const half* localconsthalfp,\n"
+    "                           local const half * restrict localconsthalfrestrictp,\n"
+    "                           local volatile half*localvolatilehalfp,\n"
+    "                           local volatile half *restrict localvolatilehalfrestrictp,\n"
+    "                           local const volatile half* localconstvolatilehalfp,\n"
+    "                           local const volatile half * restrict localconstvolatilehalfrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_d(half halfd,\n"
+    "                          const half consthalfd,\n"
+    "                          private half privatehalfd,\n"
+    "                          private const half privateconsthalfd)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p(constant half2*constanthalf2p,\n"
+    "                           constant half2 *restrict constanthalf2restrictp,\n"
+    "                           global half2*globalhalf2p,\n"
+    "                           global half2 *restrict globalhalf2restrictp,\n"
+    "                           global const half2* globalconsthalf2p,\n"
+    "                           global const half2 * restrict globalconsthalf2restrictp,\n"
+    "                           global volatile half2*globalvolatilehalf2p,\n"
+    "                           global volatile half2 *restrict globalvolatilehalf2restrictp,\n"
+    "                           global const volatile half2* globalconstvolatilehalf2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n"
+    "                            local half2*localhalf2p,\n"
+    "                            local half2 *restrict localhalf2restrictp,\n"
+    "                            local const half2* localconsthalf2p,\n"
+    "                            local const half2 * restrict localconsthalf2restrictp,\n"
+    "                            local volatile half2*localvolatilehalf2p,\n"
+    "                            local volatile half2 *restrict localvolatilehalf2restrictp,\n"
+    "                            local const volatile half2* localconstvolatilehalf2p,\n"
+    "                            local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_d(half2 half2d,\n"
+    "                           const half2 consthalf2d,\n"
+    "                           private half2 privatehalf2d,\n"
+    "                           private const half2 privateconsthalf2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p(constant half3*constanthalf3p,\n"
+    "                           constant half3 *restrict constanthalf3restrictp,\n"
+    "                           global half3*globalhalf3p,\n"
+    "                           global half3 *restrict globalhalf3restrictp,\n"
+    "                           global const half3* globalconsthalf3p,\n"
+    "                           global const half3 * restrict globalconsthalf3restrictp,\n"
+    "                           global volatile half3*globalvolatilehalf3p,\n"
+    "                           global volatile half3 *restrict globalvolatilehalf3restrictp,\n"
+    "                           global const volatile half3* globalconstvolatilehalf3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n"
+    "                            local half3*localhalf3p,\n"
+    "                            local half3 *restrict localhalf3restrictp,\n"
+    "                            local const half3* localconsthalf3p,\n"
+    "                            local const half3 * restrict localconsthalf3restrictp,\n"
+    "                            local volatile half3*localvolatilehalf3p,\n"
+    "                            local volatile half3 *restrict localvolatilehalf3restrictp,\n"
+    "                            local const volatile half3* localconstvolatilehalf3p,\n"
+    "                            local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_d(half3 half3d,\n"
+    "                           const half3 consthalf3d,\n"
+    "                           private half3 privatehalf3d,\n"
+    "                           private const half3 privateconsthalf3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p(constant half4*constanthalf4p,\n"
+    "                           constant half4 *restrict constanthalf4restrictp,\n"
+    "                           global half4*globalhalf4p,\n"
+    "                           global half4 *restrict globalhalf4restrictp,\n"
+    "                           global const half4* globalconsthalf4p,\n"
+    "                           global const half4 * restrict globalconsthalf4restrictp,\n"
+    "                           global volatile half4*globalvolatilehalf4p,\n"
+    "                           global volatile half4 *restrict globalvolatilehalf4restrictp,\n"
+    "                           global const volatile half4* globalconstvolatilehalf4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n"
+    "                            local half4*localhalf4p,\n"
+    "                            local half4 *restrict localhalf4restrictp,\n"
+    "                            local const half4* localconsthalf4p,\n"
+    "                            local const half4 * restrict localconsthalf4restrictp,\n"
+    "                            local volatile half4*localvolatilehalf4p,\n"
+    "                            local volatile half4 *restrict localvolatilehalf4restrictp,\n"
+    "                            local const volatile half4* localconstvolatilehalf4p,\n"
+    "                            local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_d(half4 half4d,\n"
+    "                           const half4 consthalf4d,\n"
+    "                           private half4 privatehalf4d,\n"
+    "                           private const half4 privateconsthalf4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p(constant half8*constanthalf8p,\n"
+    "                           constant half8 *restrict constanthalf8restrictp,\n"
+    "                           global half8*globalhalf8p,\n"
+    "                           global half8 *restrict globalhalf8restrictp,\n"
+    "                           global const half8* globalconsthalf8p,\n"
+    "                           global const half8 * restrict globalconsthalf8restrictp,\n"
+    "                           global volatile half8*globalvolatilehalf8p,\n"
+    "                           global volatile half8 *restrict globalvolatilehalf8restrictp,\n"
+    "                           global const volatile half8* globalconstvolatilehalf8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n"
+    "                            local half8*localhalf8p,\n"
+    "                            local half8 *restrict localhalf8restrictp,\n"
+    "                            local const half8* localconsthalf8p,\n"
+    "                            local const half8 * restrict localconsthalf8restrictp,\n"
+    "                            local volatile half8*localvolatilehalf8p,\n"
+    "                            local volatile half8 *restrict localvolatilehalf8restrictp,\n"
+    "                            local const volatile half8* localconstvolatilehalf8p,\n"
+    "                            local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_d(half8 half8d,\n"
+    "                           const half8 consthalf8d,\n"
+    "                           private half8 privatehalf8d,\n"
+    "                           private const half8 privateconsthalf8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p(constant half16*constanthalf16p,\n"
+    "                            constant half16 *restrict constanthalf16restrictp,\n"
+    "                            global half16*globalhalf16p,\n"
+    "                            global half16 *restrict globalhalf16restrictp,\n"
+    "                            global const half16* globalconsthalf16p,\n"
+    "                            global const half16 * restrict globalconsthalf16restrictp,\n"
+    "                            global volatile half16*globalvolatilehalf16p,\n"
+    "                            global volatile half16 *restrict globalvolatilehalf16restrictp,\n"
+    "                            global const volatile half16* globalconstvolatilehalf16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n"
+    "                             local half16*localhalf16p,\n"
+    "                             local half16 *restrict localhalf16restrictp,\n"
+    "                             local const half16* localconsthalf16p,\n"
+    "                             local const half16 * restrict localconsthalf16restrictp,\n"
+    "                             local volatile half16*localvolatilehalf16p,\n"
+    "                             local volatile half16 *restrict localvolatilehalf16restrictp,\n"
+    "                             local const volatile half16* localconstvolatilehalf16p,\n"
+    "                             local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_d(half16 half16d,\n"
+    "                            const half16 consthalf16d,\n"
+    "                            private half16 privatehalf16d,\n"
+    "                            private const half16 privateconsthalf16d)\n"
+    "{}\n",
+    "\n"
+};
+
+static const char * half_arg_info[][77] = {
+    {
+        "half_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp",
+        NULL
+    },
+    {
+        "half_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp",
+        NULL
+    },
+    {
+        "half_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "halfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "consthalfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privatehalfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privateconsthalfd",
+        NULL
+    },
+    {
+        "half_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p",
+        NULL
+    },
+    {
+        "half_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp",
+        NULL
+    },
+    {
+        "half_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "half2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "consthalf2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privatehalf2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privateconsthalf2d",
+        NULL
+    },
+    {
+        "half_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p",
+        NULL
+    },
+    {
+        "half_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp",
+        NULL
+    },
+    {
+        "half_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "half3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "consthalf3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privatehalf3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privateconsthalf3d",
+        NULL
+    },
+    {
+        "half_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p",
+        NULL
+    },
+    {
+        "half_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp",
+        NULL
+    },
+    {
+        "half_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "half4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "consthalf4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privatehalf4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privateconsthalf4d",
+        NULL
+    },
+    {
+        "half_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p",
+        NULL
+    },
+    {
+        "half_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp",
+        NULL
+    },
+    {
+        "half_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "half8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "consthalf8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privatehalf8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privateconsthalf8d",
+        NULL
+    },
+    {
+        "half_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p",
+        NULL
+    },
+    {
+        "half_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp",
+        NULL
+    },
+    {
+        "half_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "half16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "consthalf16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privatehalf16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privateconsthalf16d",
+        NULL
+    },
+};
+
+
+template<typename arg_info_t>
+int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) {
+
+    const size_t max_name_len = 512;
+    cl_char name[ max_name_len ];
+    cl_uint arg_count, numArgs;
+    size_t i, j, size;
+    int error;
+
+    clProgramWrapper program =
+    clCreateProgramWithSource(context, lines_count, kernel_args, NULL, &error);
+    if ( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create required arguments kernel program" );
+        return -1;
+    }
+
+    // Compile the program
+    log_info( "Building kernels...\n" );
+    clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL );
+
+    // check for build errors and exit if things didn't work
+    size_t size_ret;
+    cl_build_status build_status;
+    error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret);
+    test_error( error, "Unable to query build status" );
+    if (build_status == CL_BUILD_ERROR) {
+        printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to get build log size" );
+        char *build_log = (char *)malloc(size_ret);
+        error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to get build log" );
+        printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        printf("CL_BUILD_ERROR. exiting\n");
+        free(build_log);
+        return -1;
+    }
+
+    // Lookup the number of kernels in the program.
+    log_info( "Testing kernels...\n" );
+    size_t total_kernels = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL );
+    test_error( error, "Unable to get program info num kernels" );
+
+    if ( total_kernels != total_kernels_in_program )
+    {
+        print_error( error, "Program did not build all kernels" );
+        return -1;
+    }
+
+    // Lookup the kernel names.
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len );
+    test_error( error, "Unable to get length of kernel names list." );
+
+    size_t expected_kernel_names_len = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] );
+    }
+    if ( kernel_names_len != expected_kernel_names_len )
+    {
+        log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len );
+        return -1;
+    }
+
+    const size_t len = ( kernel_names_len + 1 ) * sizeof( char );
+    char* kernel_names = (char*) malloc( len );
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len );
+    test_error( error, "Unable to get kernel names list." );
+
+    // Check to see if the kernel name array is null terminated.
+    if ( kernel_names[ kernel_names_len - 1 ] != '\0' )
+    {
+        free( kernel_names );
+        print_error( error, "Kernel name list was not null terminated" );
+        return -1;
+    }
+
+    // Check to see if the correct kernel name string was returned.
+    // Does the string contain each expected kernel name?
+    for ( i = 0; i < total_kernels; ++i )
+        if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) )
+            break;
+    if ( i != total_kernels )
+    {
+        log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] );
+        free( kernel_names );
+        return -1;
+    }
+
+    // Are the kernel names delimited by ';'?
+    if ( !strtok( kernel_names, ";" ) )
+    {
+        error = -1;
+    }
+    else
+    {
+        for ( i = 1; i < total_kernels; ++i )
+        {
+            if ( !strtok( NULL, ";" ) )
+            {
+                error = -1;
+            }
+        }
+    }
+    if ( error )
+    {
+        log_error( "Kernel names string was not properly delimited by ';'\n" );
+        free( kernel_names );
+        return -1;
+    }
+    free( kernel_names );
+
+    // Create kernel objects and query them.
+    int rc = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        int kernel_rc = 0;
+        const char* kernel_name = arg_info[ i ][ 0 ];
+        clKernelWrapper kernel = clCreateKernel(program, kernel_name, &error);
+        if( kernel == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Could not get kernel: %s\n", kernel_name );
+            kernel_rc = -1;
+        }
+
+        if(kernel_rc == 0)
+        {
+            // Determine the expected number of arguments.
+            arg_count = 0;
+            while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL)
+                ++arg_count;
+
+            // Try to get the number of arguments.
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size );
+            test_error( error, "Unable to get kernel arg count param size" );
+            if( size != sizeof( numArgs ) )
+            {
+                log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+
+
+        if(kernel_rc == 0)
+        {
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+            test_error( error, "Unable to get kernel arg count" );
+            if( numArgs != arg_count )
+            {
+                log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+
+        if(kernel_rc == 0)
+        {
+            for ( j = 0; j < numArgs; ++j )
+            {
+
+                int arg_rc = 0;
+                cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ];
+                cl_kernel_arg_access_qualifier expected_access_qualifier =  (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ];
+                cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ];
+                const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ];
+                const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ];
+
+                // Try to get the address qualifier of each argument.
+                cl_kernel_arg_address_qualifier address_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size );
+                test_error( error, "Unable to get argument address qualifier" );
+                error = (address_qualifier != expected_address_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the access qualifier of each argument.
+                cl_kernel_arg_access_qualifier access_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size );
+                test_error( error, "Unable to get argument access qualifier" );
+                error = (access_qualifier != expected_access_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the type qualifier of each argument.
+                cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size );
+                test_error( error, "Unable to get argument type qualifier" );
+                error = (arg_type_qualifier != expected_type_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the type of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument type name" );
+                error = strcmp( (const char*) name, expected_type_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name );
+                    arg_rc = -1;
+                }
+
+                // Try to get the name of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument name" );
+                error = strcmp( (const char*) name, expected_arg_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name );
+                    arg_rc = -1;
+                }
+
+                if(arg_rc != 0) {
+                    kernel_rc = -1;
+                }
+            }
+        }
+
+        //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" );
+        if(kernel_rc != 0) {
+            rc = -1;
+        }
+    }
+  return rc;
+}
+
+
+int test_get_kernel_arg_info_compatibility( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    size_t size;
+    int error;
+
+    cl_bool supports_double = 0; // assume not
+    cl_bool supports_half = 0; // assume not
+  cl_bool supports_images = 0; // assume not
+
+    // Check if this device supports images
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+  if (supports_images) {
+    log_info(" o Device supports images\n");
+    log_info(" o Expecting SUCCESS when testing image kernel arguments.\n");
+  }
+  else {
+    log_info(" o Device lacks image support\n");
+    log_info(" o Not testing image kernel arguments.\n");
+  }
+
+    if (is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info(" o Device claims extension 'cl_khr_fp64'\n");
+        log_info(" o Expecting SUCCESS when testing double kernel arguments.\n");
+        supports_double = 1;
+    } else {
+        cl_device_fp_config double_fp_config;
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
+        if (double_fp_config != 0)
+            supports_double = 1;
+        else {
+            log_info(" o Device lacks extension 'cl_khr_fp64'\n");
+            log_info(" o Not testing double kernel arguments.\n");
+            supports_double = 0;
+        }
+    }
+
+    if (is_extension_available(deviceID, "cl_khr_fp16")) {
+        log_info(" o Device claims extension 'cl_khr_fp16'\n");
+        log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n");
+        supports_half = 1;
+    } else {
+        log_info(" o Device lacks extension 'cl_khr_fp16'\n");
+        log_info(" o Not testing halfn* kernel arguments.\n");
+        supports_half = 0;
+    }
+
+
+  int test_failed = 0;
+
+    // Now create a test program using required arguments
+  log_info("Testing required kernel arguments...\n");
+  error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0]));
+  test_failed = (error) ? -1 : test_failed;
+
+  if ( supports_images ) {
+    log_info("Testing optional image arguments...\n");
+    error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+
+    if ( supports_double ) {
+    log_info("Testing optional double arguments...\n");
+    error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+
+    if ( supports_half ) {
+    log_info("Testing optional half arguments...\n");
+    error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+
+    return test_failed;
+}
+
+

diff --git a/test_conformance/api/test_kernel_arg_multi_setup.cpp b/test_conformance/api/test_kernel_arg_multi_setup.cpp
index de3dc15..92c039e 100644
--- a/test_conformance/api/test_kernel_arg_multi_setup.cpp
+++ b/test_conformance/api/test_kernel_arg_multi_setup.cpp

@@ -66,39 +66,24 @@
 
     // Create input streams
     initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       get_explicit_type_size(vec1Type) * threads[0] * vec1Size,
-                       initData[0], &error);
+    streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
     test_error( error, "Unable to create testing stream" );
 
     initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       get_explicit_type_size(vec2Type) * threads[0] * vec2Size,
-                       initData[1], &error);
+    streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
     test_error( error, "Unable to create testing stream" );
 
     initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
-    streams[2] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       get_explicit_type_size(vec3Type) * threads[0] * vec3Size,
-                       initData[2], &error);
+    streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
     test_error( error, "Unable to create testing stream" );
 
-    streams[3] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vec1Type) * threads[0] * vec1Size, NULL, &error);
+    streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
     test_error( error, "Unable to create testing stream" );
 
-    streams[4] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vec2Type) * threads[0] * vec2Size, NULL, &error);
+    streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
     test_error( error, "Unable to create testing stream" );
 
-    streams[5] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vec3Type) * threads[0] * vec3Size, NULL, &error);
+    streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
     test_error( error, "Unable to create testing stream" );
 
     // Set the arguments

diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp
deleted file mode 100644
index 2e4e0a7..0000000
--- a/test_conformance/api/test_kernel_attributes.cpp
+++ /dev/null

@@ -1,339 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <iostream>
-#include <vector>
-#include <string>
-#include <algorithm>
-#include "procs.h"
-#include "harness/errorHelpers.h"
-#include "harness/typeWrappers.h"
-#include "harness/parseParameters.h"
-
-using KernelAttributes = std::vector<std::string>;
-
-static std::string generate_kernel_source(const KernelAttributes& attributes)
-{
-    std::string kernel;
-    for (auto attribute : attributes)
-    {
-        kernel += "__attribute__((" + attribute + "))\n";
-    }
-    kernel += "__kernel void test_kernel(){}";
-    return kernel;
-}
-
-
-using AttributePermutations = std::vector<KernelAttributes>;
-
-// The following combinations have been chosen as they place each of the
-// attribute types in the different orders that they can occur. While distinct
-// permutations would provide a complete overview of the API the sheer number of
-// combinations increases the runtime of this test by an unreasonable amount
-AttributePermutations vect_tests;
-AttributePermutations work_tests;
-AttributePermutations reqd_tests;
-
-AttributePermutations vect_reqd_tests;
-AttributePermutations work_vect_tests;
-AttributePermutations reqd_work_tests;
-
-AttributePermutations vect_work_reqd_tests;
-AttributePermutations work_reqd_vect_tests;
-AttributePermutations reqd_vect_work_tests;
-
-
-// Generate a vector with vec_type_hint(<data_type>) so that it can be used to
-// generate different kernels
-static KernelAttributes generate_vec_type_hint_data(cl_device_id deviceID)
-{
-    KernelAttributes vec_type_hint_data;
-    // TODO Test for signed vectors (char/short/int/etc)
-    std::vector<std::string> vector_types = { "uchar", "ushort", "uint",
-                                              "float" };
-    if (gHasLong)
-    {
-        vector_types.push_back("ulong");
-    }
-    if (device_supports_half(deviceID))
-    {
-        vector_types.push_back("half");
-    }
-    if (device_supports_double(deviceID))
-    {
-        vector_types.push_back("double");
-    }
-
-    const auto vector_sizes = { "2", "3", "4", "8", "16" };
-    for (auto type : vector_types)
-    {
-        for (auto size : vector_sizes)
-        {
-            vec_type_hint_data.push_back("vec_type_hint(" + type + size + ")");
-        }
-    }
-    return vec_type_hint_data;
-}
-
-
-struct WorkGroupDimensions
-{
-    int x;
-    int y;
-    int z;
-};
-
-// Generate vectors to store reqd_work_group_size(<dimensions>) and
-// work_group_size_hint(<dimensions>) so that they can be used to generate
-// different kernels
-static KernelAttributes generate_reqd_work_group_size_data(
-    const std::vector<WorkGroupDimensions>& work_group_dimensions)
-{
-    KernelAttributes reqd_work_group_size_data;
-    for (auto dimension : work_group_dimensions)
-    {
-        reqd_work_group_size_data.push_back(
-            "reqd_work_group_size(" + std::to_string(dimension.x) + ","
-            + std::to_string(dimension.y) + "," + std::to_string(dimension.z)
-            + ")");
-    }
-    return reqd_work_group_size_data;
-}
-
-static KernelAttributes generate_work_group_size_data(
-    const std::vector<WorkGroupDimensions>& work_group_dimensions)
-{
-    KernelAttributes work_group_size_hint_data;
-    for (auto dimension : work_group_dimensions)
-    {
-        work_group_size_hint_data.push_back(
-            "work_group_size_hint(" + std::to_string(dimension.x) + ","
-            + std::to_string(dimension.y) + "," + std::to_string(dimension.z)
-            + ")");
-    }
-    return work_group_size_hint_data;
-}
-
-// Populate the Global Vectors which store individual Kernel Attributes
-static void populate_single_attribute_tests(
-    // Vectors to store the different data that fill the attributes
-    const KernelAttributes& vec_type_hint_data,
-    const KernelAttributes& work_group_size_hint_data,
-    const KernelAttributes& reqd_work_group_size_data)
-{
-    for (auto vector_test : vec_type_hint_data)
-    {
-        // Initialise vec_type_hint attribute tests
-        vect_tests.push_back({ vector_test });
-    }
-    for (auto work_group_test : work_group_size_hint_data)
-    {
-
-        // Initialise work_group_size_hint attribute test
-        work_tests.push_back({ work_group_test });
-    }
-    for (auto reqd_work_group_test : reqd_work_group_size_data)
-    {
-
-        // Initialise reqd_work_group_size attribute tests
-        reqd_tests.push_back({ reqd_work_group_test });
-    }
-}
-
-// Populate the Global Vectors which store the different permutations of 2
-// Kernel Attributes
-static void populate_double_attribute_tests(
-    const KernelAttributes& vec_type_hint_data,
-    const KernelAttributes& work_group_size_hint_data,
-    const KernelAttributes& reqd_work_group_size_data)
-{
-    for (auto vector_test : vec_type_hint_data)
-    {
-        for (auto work_group_test : work_group_size_hint_data)
-        {
-            // Initialise the tests for the permutation of work_group_size_hint
-            // combined with vec_type_hint
-            work_vect_tests.push_back({ work_group_test, vector_test });
-        }
-        for (auto reqd_work_group_test : reqd_work_group_size_data)
-        {
-            // Initialise the tests for the permutation of vec_type_hint and
-            // reqd_work_group_size
-            vect_reqd_tests.push_back({ vector_test, reqd_work_group_test });
-        }
-    }
-    for (auto work_group_test : work_group_size_hint_data)
-    {
-
-        for (auto reqd_work_group_test : reqd_work_group_size_data)
-        {
-            // Initialse the tests for the permutation of reqd_work_group_size
-            // and  work_group_size_hint
-            reqd_work_tests.push_back(
-                { reqd_work_group_test, work_group_test });
-        }
-    }
-}
-
-// Populate the Global Vectors which store the different permutations of 3
-// Kernel Attributes
-static void populate_triple_attribute_tests(
-    const KernelAttributes& vec_type_hint_data,
-    const KernelAttributes& work_group_size_hint_data,
-    const KernelAttributes& reqd_work_group_size_data)
-{
-    for (auto vector_test : vec_type_hint_data)
-    {
-        for (auto work_group_test : work_group_size_hint_data)
-        {
-            for (auto reqd_work_group_test : reqd_work_group_size_data)
-            {
-                //  Initialise the chosen permutations of 3 attributes
-                vect_work_reqd_tests.push_back(
-                    { vector_test, work_group_test, reqd_work_group_test });
-                work_reqd_vect_tests.push_back(
-                    { work_group_test, reqd_work_group_test, vector_test });
-                reqd_vect_work_tests.push_back(
-                    { reqd_work_group_test, vector_test, work_group_test });
-            }
-        }
-    }
-}
-
-static const std::vector<AttributePermutations*>
-generate_attribute_tests(const KernelAttributes& vec_type_hint_data,
-                         const KernelAttributes& work_group_size_hint_data,
-                         const KernelAttributes& reqd_work_group_size_data)
-{
-    populate_single_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-    populate_double_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-    populate_triple_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-
-    // Store all of the filled vectors in a single structure
-    const std::vector<AttributePermutations*> all_tests = {
-        &vect_tests,           &work_tests,           &reqd_tests,
-
-        &work_vect_tests,      &vect_reqd_tests,      &reqd_work_tests,
-
-        &vect_work_reqd_tests, &work_reqd_vect_tests, &reqd_vect_work_tests
-    };
-    return all_tests;
-}
-
-static const std::vector<AttributePermutations*>
-initialise_attribute_data(cl_device_id deviceID)
-{
-    // This vector stores different work group dimensions that can be used by
-    // the reqd_work_group_size and work_group_size_hint attributes. It
-    // currently only has a single value to minimise time complexity of the
-    // overall test but can be easily changed.
-    static const std::vector<WorkGroupDimensions> work_group_dimensions = {
-        { 1, 1, 1 }
-    };
-    KernelAttributes vec_type_hint_data = generate_vec_type_hint_data(deviceID);
-    KernelAttributes work_group_size_hint_data =
-        generate_work_group_size_data(work_group_dimensions);
-    KernelAttributes reqd_work_group_size_data =
-        generate_reqd_work_group_size_data(work_group_dimensions);
-
-    // Generate all the permutations of attributes to create different test
-    // suites
-    return generate_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-}
-
-static bool run_test(cl_context context, cl_device_id deviceID,
-                     const AttributePermutations& permutations)
-{
-    bool success = true;
-    for (auto attribute_permutation : permutations)
-    {
-
-        std::string kernel_source_string =
-            generate_kernel_source(attribute_permutation);
-        const char* kernel_src = kernel_source_string.c_str();
-        clProgramWrapper program;
-        clKernelWrapper kernel;
-        cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                                 &kernel_src, "test_kernel");
-        test_error(err, "create_single_kernel_helper");
-
-        // Get the size of the kernel attribute string returned
-        size_t size = 0;
-        err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, &size);
-        test_error(err, "clGetKernelInfo");
-        std::vector<char> attributes(size);
-        err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, attributes.size(),
-                              attributes.data(), nullptr);
-        test_error(err, "clGetKernelInfo");
-        std::string attribute_string(attributes.data());
-        attribute_string.erase(
-            std::remove(attribute_string.begin(), attribute_string.end(), ' '),
-            attribute_string.end());
-        if (gCompilationMode != kOnline)
-        {
-            if (!attribute_string.empty())
-            {
-                success = false;
-                log_error("Error: Expected an empty string\n");
-                log_error("Attribute string reported as: %s\n",
-                          attribute_string.c_str());
-            }
-        }
-        else
-        {
-            bool permutation_success = true;
-            for (auto attribute : attribute_permutation)
-            {
-                if (attribute_string.find(attribute) == std::string::npos)
-                {
-                    success = false;
-                    permutation_success = false;
-                    log_error("ERROR: did not find expected attribute: '%s'\n",
-                              attribute.c_str());
-                }
-            }
-            if (!permutation_success)
-            {
-                log_error("Attribute string reported as: %s\n",
-                          attribute_string.c_str());
-            }
-        }
-    }
-    return success;
-}
-
-int test_kernel_attributes(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-{
-    bool success = true;
-
-    // Vector to store all of the tests
-    const std::vector<AttributePermutations*> all_tests =
-        initialise_attribute_data(deviceID);
-
-    for (auto permutations : all_tests)
-    {
-        success = success && run_test(context, deviceID, *permutations);
-    }
-    return success ? TEST_PASS : TEST_FAIL;
-}

diff --git a/test_conformance/api/test_kernel_private_memory_size.cpp b/test_conformance/api/test_kernel_private_memory_size.cpp
deleted file mode 100644
index a789b4d..0000000
--- a/test_conformance/api/test_kernel_private_memory_size.cpp
+++ /dev/null

@@ -1,42 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/errorHelpers.h"
-#include "harness/typeWrappers.h"
-#include <iostream>
-#include "procs.h"
-
-int test_kernel_private_memory_size(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-{
-    const char* TEST_KERNEL =
-        R"(__kernel void private_memory( __global uint *buffer ){
-         volatile __private uint x[1];
-         buffer[0] = x[0];
-         })";
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &TEST_KERNEL, "private_memory");
-    test_error(err, "create_single_kernel_helper");
-    cl_ulong size = CL_ULONG_MAX;
-    err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PRIVATE_MEM_SIZE,
-                                   sizeof(cl_ulong), &size, nullptr);
-
-    test_error(err, "clGetKernelWorkGroupInfo");
-
-    return TEST_PASS;
-}

diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp
index d25410b..993a72f 100644
--- a/test_conformance/api/test_kernels.cpp
+++ b/test_conformance/api/test_kernels.cpp

@@ -192,10 +192,10 @@
     }
 
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_float) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
 
@@ -323,10 +323,10 @@
     }
 
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_float) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
 
@@ -412,15 +412,15 @@
     }
     free_mtdata(d); d = NULL;
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(cl_int) * num_elements,
                                 randomTestDataA.data(), &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(cl_int) * num_elements,
                                 randomTestDataB.data(), &error);
     test_error( error, "Creating test array failed" );
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
 
@@ -496,11 +496,11 @@
     }
     free_mtdata(d); d = NULL;
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(image_pair_t) * num_elements,
                                 (void *)image_pair.data(), &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
 
@@ -593,11 +593,11 @@
     }
     free_mtdata(d); d = NULL;
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(cl_int) * num_elements,
                                 randomTestDataA.data(), &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
 

diff --git a/test_conformance/api/test_mem_object_properties_queries.cpp b/test_conformance/api/test_mem_object_properties_queries.cpp
deleted file mode 100644
index 55300a6..0000000
--- a/test_conformance/api/test_mem_object_properties_queries.cpp
+++ /dev/null

@@ -1,362 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/propertyHelpers.h"
-#include "harness/typeWrappers.h"
-#include <vector>
-#include <algorithm>
-
-typedef enum
-{
-    image,
-    image_with_properties,
-    buffer,
-    buffer_with_properties,
-    subbuffer,
-} test_type;
-
-struct test_data
-{
-    test_type type;
-    std::vector<cl_mem_properties> properties;
-    std::string description;
-    cl_kernel kernel;
-};
-
-static int create_object_and_check_properties(cl_context context,
-                                              clMemWrapper& test_object,
-                                              test_data test_case,
-                                              cl_mem_flags flags,
-                                              std::vector<cl_uint> local_data,
-                                              cl_uint size_x, cl_uint size_y)
-{
-    cl_int error = CL_SUCCESS;
-
-    switch (test_case.type)
-    {
-        case image: {
-            cl_image_format format = { 0 };
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNSIGNED_INT32;
-            test_object = clCreateImage2D(context, flags, &format, size_x,
-                                          size_y, 0, local_data.data(), &error);
-            test_error(error, "clCreateImage2D failed");
-        }
-        break;
-        case image_with_properties: {
-            cl_image_format format = { 0 };
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNSIGNED_INT32;
-            cl_image_desc desc = { 0 };
-            desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-            desc.image_width = size_x;
-            desc.image_height = size_y;
-
-            if (test_case.properties.size() == 0)
-            {
-                test_object = clCreateImageWithProperties(
-                    context, NULL, flags, &format, &desc, local_data.data(),
-                    &error);
-            }
-            else
-            {
-                test_object = clCreateImageWithProperties(
-                    context, test_case.properties.data(), flags, &format, &desc,
-                    local_data.data(), &error);
-            }
-            test_error(error, "clCreateImageWithProperties failed");
-        }
-        break;
-        case buffer: {
-            test_object = clCreateBuffer(context, flags,
-                                         local_data.size() * sizeof(cl_uint),
-                                         local_data.data(), &error);
-            test_error(error, "clCreateBuffer failed");
-        }
-        case buffer_with_properties: {
-            if (test_case.properties.size() == 0)
-            {
-                test_object = clCreateBufferWithProperties(
-                    context, NULL, flags, local_data.size() * sizeof(cl_uint),
-                    local_data.data(), &error);
-            }
-            else
-            {
-                test_object = clCreateBufferWithProperties(
-                    context, test_case.properties.data(), flags,
-                    local_data.size() * sizeof(cl_uint), local_data.data(),
-                    &error);
-            }
-            test_error(error, "clCreateBufferWithProperties failed.");
-        }
-        break;
-        case subbuffer: {
-            clMemWrapper parent_object;
-            if (test_case.properties.size() == 0)
-            {
-                parent_object = clCreateBufferWithProperties(
-                    context, NULL, flags, local_data.size() * sizeof(cl_uint),
-                    local_data.data(), &error);
-            }
-            else
-            {
-                parent_object = clCreateBufferWithProperties(
-                    context, test_case.properties.data(), flags,
-                    local_data.size() * sizeof(cl_uint), local_data.data(),
-                    &error);
-            }
-            test_error(error, "clCreateBufferWithProperties failed.");
-
-            cl_mem_flags subbuffer_flags = flags
-                & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
-
-            cl_buffer_region region = { 0 };
-            region.origin = 0;
-            region.size = local_data.size() * sizeof(cl_uint);
-            test_object = clCreateSubBuffer(parent_object, subbuffer_flags,
-                                            CL_BUFFER_CREATE_TYPE_REGION,
-                                            &region, &error);
-            test_error(error, "clCreateSubBuffer failed.");
-        }
-        break;
-        default: log_error("Unknown test type!"); return TEST_FAIL;
-    }
-
-    std::vector<cl_mem_properties> check_properties;
-    size_t set_size = 0;
-
-    error =
-        clGetMemObjectInfo(test_object, CL_MEM_PROPERTIES, 0, NULL, &set_size);
-    test_error(error,
-               "clGetMemObjectInfo failed asking for CL_MEM_PROPERTIES size.");
-
-    // Buffers, subbuffers, and images must return no properties.
-    if (test_case.type == buffer || test_case.type == subbuffer
-        || test_case.type == image)
-    {
-        if (set_size == 0)
-        {
-            return TEST_PASS;
-        }
-        else
-        {
-            log_error("Queried properties must have size equal to zero for "
-                      "buffers, subbuffers, and images.");
-            return TEST_FAIL;
-        }
-    }
-
-    if (set_size == 0 && test_case.properties.size() == 0)
-    {
-        return TEST_PASS;
-    }
-    if (set_size != test_case.properties.size() * sizeof(cl_mem_properties))
-    {
-        log_error("ERROR: CL_MEM_PROPERTIES size is %d, expected %d.\n",
-                  set_size,
-                  test_case.properties.size() * sizeof(cl_queue_properties));
-        return TEST_FAIL;
-    }
-
-    cl_uint number_of_props = set_size / sizeof(cl_mem_properties);
-    check_properties.resize(number_of_props);
-    error = clGetMemObjectInfo(test_object, CL_MEM_PROPERTIES, set_size,
-                               check_properties.data(), NULL);
-    test_error(error,
-               "clGetMemObjectInfo failed asking for CL_MEM_PROPERTIES.");
-
-    error = compareProperties(check_properties, test_case.properties);
-    return error;
-}
-
-static int run_test_query_properties(cl_context context, cl_command_queue queue,
-                                     test_data test_case)
-{
-    int error = CL_SUCCESS;
-    log_info("\nTC description: %s\n", test_case.description.c_str());
-
-    clMemWrapper obj_src;
-    clMemWrapper obj_dst;
-    clEventWrapper event;
-    MTdata init_generator = init_genrand(gRandomSeed);
-    cl_mem_flags flags;
-    cl_uint size_x = 4;
-    cl_uint size_y = 4;
-    size_t size = size_x * size_y * 4;
-    size_t global_dim[2] = { size_x, size_y };
-    const size_t origin[3] = { 0, 0, 0 };
-    const size_t region[3] = { size_x, size_y, 1 };
-
-    std::vector<cl_uint> src_data(size);
-    std::vector<cl_uint> dst_data(size);
-
-    generate_random_data(kUInt, size, init_generator, src_data.data());
-    generate_random_data(kUInt, size, init_generator, dst_data.data());
-    free_mtdata(init_generator);
-    init_generator = NULL;
-
-    flags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR;
-    error = create_object_and_check_properties(context, obj_src, test_case,
-                                               flags, src_data, size_x, size_y);
-    test_error(error, "create_object_and_check_properties obj_src failed.");
-
-    flags = CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR;
-    error = create_object_and_check_properties(context, obj_dst, test_case,
-                                               flags, dst_data, size_x, size_y);
-    test_error(error, "create_object_and_check_properties obj_dst failed.");
-
-    error = clSetKernelArg(test_case.kernel, 0, sizeof(obj_src), &obj_src);
-    test_error(error, "clSetKernelArg 0 failed.");
-
-    error = clSetKernelArg(test_case.kernel, 1, sizeof(obj_dst), &obj_dst);
-    test_error(error, "clSetKernelArg 1 failed.");
-    switch (test_case.type)
-    {
-        case image:
-        case image_with_properties: {
-            error = clEnqueueNDRangeKernel(queue, test_case.kernel, 2, NULL,
-                                           global_dim, NULL, 0, NULL, &event);
-            test_error(error, "clEnqueueNDRangeKernel failed.");
-
-            error = clWaitForEvents(1, &event);
-            test_error(error, "clWaitForEvents failed.");
-
-            error = clEnqueueReadImage(queue, obj_dst, CL_TRUE, origin, region,
-                                       0, 0, dst_data.data(), 0, NULL, NULL);
-            test_error(error, "clEnqueueReadImage failed.");
-        }
-        break;
-        case buffer:
-        case buffer_with_properties:
-        case subbuffer: {
-            error = clEnqueueNDRangeKernel(queue, test_case.kernel, 1, NULL,
-                                           &size, NULL, 0, NULL, &event);
-            test_error(error, "clEnqueueNDRangeKernel failed.");
-
-            error = clWaitForEvents(1, &event);
-            test_error(error, "clWaitForEvents failed.");
-
-            error = clEnqueueReadBuffer(queue, obj_dst, CL_TRUE, 0,
-                                        dst_data.size() * sizeof(cl_uint),
-                                        dst_data.data(), 0, NULL, NULL);
-            test_error(error, "clEnqueueReadBuffer failed.");
-        }
-        break;
-        default: log_error("Unknown test type!"); return TEST_FAIL;
-    }
-
-    for (size_t i = 0; i < size; ++i)
-    {
-        if (dst_data[i] != src_data[i])
-        {
-            log_error("ERROR: Output results mismatch.");
-            return TEST_FAIL;
-        }
-    }
-
-    log_info("TC result: passed\n");
-    return TEST_PASS;
-}
-
-int test_image_properties_queries(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-{
-    int error = CL_SUCCESS;
-    cl_bool supports_images = CL_TRUE;
-
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT,
-                            sizeof(supports_images), &supports_images, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
-
-    if (supports_images == CL_FALSE)
-    {
-        log_info("No image support on current device - skipped\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    const char* kernel_src = R"CLC(
-        __kernel void data_copy(read_only image2d_t src, write_only image2d_t dst)
-        {
-            int tid_x = get_global_id(0);
-            int tid_y = get_global_id(1);
-            int2 coords = (int2)(tid_x, tid_y);
-            uint4 val = read_imageui(src, coords);
-            write_imageui(dst, coords, val);
-
-        }
-        )CLC";
-
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "data_copy");
-    test_error(error, "create_single_kernel_helper failed");
-
-    std::vector<test_data> test_cases;
-    test_cases.push_back({ image, {}, "regular image", kernel });
-    test_cases.push_back(
-        { image_with_properties, { 0 }, "image, 0 properties", kernel });
-    test_cases.push_back(
-        { image_with_properties, {}, "image, NULL properties", kernel });
-
-    for (auto test_case : test_cases)
-    {
-        error |= run_test_query_properties(context, queue, test_case);
-    }
-
-    return error;
-}
-
-int test_buffer_properties_queries(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements)
-{
-    int error = CL_SUCCESS;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    const char* kernel_src = R"CLC(
-        __kernel void data_copy(__global int *src, __global int *dst)
-        {
-            int  tid = get_global_id(0);
-
-            dst[tid] = src[tid];
-
-        }
-        )CLC";
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "data_copy");
-    test_error(error, "create_single_kernel_helper failed");
-
-    std::vector<test_data> test_cases;
-    test_cases.push_back({ buffer, {}, "regular buffer", kernel });
-    test_cases.push_back(
-        { buffer_with_properties, { 0 }, "buffer with 0 properties", kernel });
-    test_cases.push_back(
-        { buffer_with_properties, {}, "buffer with NULL properties", kernel });
-    test_cases.push_back(
-        { subbuffer, { 0 }, "subbuffer with 0 properties", kernel });
-    test_cases.push_back(
-        { subbuffer, {}, "subbuffer with NULL properties", kernel });
-
-    for (auto test_case : test_cases)
-    {
-        error |= run_test_query_properties(context, queue, test_case);
-    }
-
-    return error;
-}

diff --git a/test_conformance/api/test_mem_objects.cpp b/test_conformance/api/test_mem_objects.cpp
index c29613f..b0dc99d 100644
--- a/test_conformance/api/test_mem_objects.cpp
+++ b/test_conformance/api/test_mem_objects.cpp

@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2020 The Khronos Group Inc.
-//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,109 +17,92 @@
 
 static volatile cl_int sDestructorIndex;
 
-void CL_CALLBACK mem_destructor_callback(cl_mem memObject, void *userData)
+void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
 {
-    int *userPtr = (int *)userData;
+    int * userPtr = (int *)userData;
 
-    // ordering of callbacks is guaranteed, meaning we don't need to do atomic
-    // operation here
+    // ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
     *userPtr = ++sDestructorIndex;
 }
 
-int test_mem_object_destructor_callback_single(clMemWrapper &memObject)
+#ifndef ABS
+#define ABS( x ) ( ( x < 0 ) ? -x : x )
+#endif
+
+int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
 {
     cl_int error;
+    int i;
 
     // Set up some variables to catch the order in which callbacks are called
-    volatile int callbackOrders[3] = { 0, 0, 0 };
+    volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
     sDestructorIndex = 0;
 
     // Set up the callbacks
-    error = clSetMemObjectDestructorCallback(memObject, mem_destructor_callback,
-                                             (void *)&callbackOrders[0]);
-    test_error(error, "Unable to set destructor callback");
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
+    test_error( error, "Unable to set destructor callback" );
 
-    error = clSetMemObjectDestructorCallback(memObject, mem_destructor_callback,
-                                             (void *)&callbackOrders[1]);
-    test_error(error, "Unable to set destructor callback");
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
+    test_error( error, "Unable to set destructor callback" );
 
-    error = clSetMemObjectDestructorCallback(memObject, mem_destructor_callback,
-                                             (void *)&callbackOrders[2]);
-    test_error(error, "Unable to set destructor callback");
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
+    test_error( error, "Unable to set destructor callback" );
 
     // Now release the buffer, which SHOULD call the callbacks
-    error = clReleaseMemObject(memObject);
-    test_error(error, "Unable to release test buffer");
+    error = clReleaseMemObject( memObject );
+    test_error( error, "Unable to release test buffer" );
 
-    // Note: since we manually released the mem wrapper, we need to set it to
-    // NULL to prevent a double-release
+    // Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
     memObject = NULL;
 
     // At this point, all three callbacks should have already been called
     int numErrors = 0;
-    for (int i = 0; i < 3; i++)
+    for(  i = 0; i < 3; i++ )
     {
-        // Spin waiting for the release to finish.  If you don't call the
-        // mem_destructor_callback, you will not pass the test.  bugzilla 6316
-        log_info("\tWaiting for callback %d...\n", i);
-        int wait = 0;
-        while (0 == callbackOrders[i])
-        {
-            usleep(100000); // 1/10th second
-            if (++wait >= 10 * 10)
-            {
-                log_error("\tERROR: Callback %d was not called within 10 "
-                          "seconds!  Assuming failure.\n",
-                          i + 1);
-                numErrors++;
-                break;
-            }
-        }
+        // Spin waiting for the release to finish.  If you don't call the mem_destructor_callback, you will not
+        // pass the test.  bugzilla 6316
+        while( 0 == callbackOrders[i] )
+        {}
 
-        if (callbackOrders[i] != 3 - i)
+        if( ABS( callbackOrders[ i ] ) != 3-i )
         {
-            log_error("\tERROR: Callback %d was called in the wrong order! "
-                      "(Was called order %d, should have been order %d)\n",
-                      i + 1, callbackOrders[i], 3 - i);
+            log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
+                      i+1, ABS( callbackOrders[ i ] ), i );
             numErrors++;
         }
     }
 
-    return (numErrors > 0) ? TEST_FAIL : TEST_PASS;
+    return ( numErrors > 0 ) ? -1 : 0;
 }
 
-int test_mem_object_destructor_callback(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements)
+int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     clMemWrapper testBuffer, testImage;
     cl_int error;
 
 
     // Create a buffer and an image to test callbacks against
-    testBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, NULL, &error);
-    test_error(error, "Unable to create testing buffer");
+    testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
 
-    if (test_mem_object_destructor_callback_single(testBuffer) != TEST_PASS)
+    if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
     {
-        log_error("ERROR: Destructor callbacks for buffer object FAILED\n");
-        return TEST_FAIL;
+        log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
+        return -1;
     }
 
-    if (checkForImageSupport(deviceID) == 0)
+    if( checkForImageSupport( deviceID ) == 0 )
     {
         cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
-        testImage = create_image_2d(context, CL_MEM_READ_ONLY, &imageFormat, 16,
-                                    16, 0, NULL, &error);
-        test_error(error, "Unable to create testing image");
+        testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
+        test_error( error, "Unable to create testing image" );
 
-        if (test_mem_object_destructor_callback_single(testImage) != TEST_PASS)
+        if( test_mem_object_destructor_callback_single( testImage ) != 0 )
         {
-            log_error("ERROR: Destructor callbacks for image object FAILED\n");
-            return TEST_FAIL;
+            log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
+            return -1;
         }
     }
 
-    return TEST_PASS;
+    return 0;
 }

diff --git a/test_conformance/api/test_min_image_formats.cpp b/test_conformance/api/test_min_image_formats.cpp
deleted file mode 100644
index f6a3546..0000000
--- a/test_conformance/api/test_min_image_formats.cpp
+++ /dev/null

@@ -1,133 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-int test_min_image_formats(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements)
-{
-    int missingFormats = 0;
-
-    cl_int error = CL_SUCCESS;
-
-    Version version = get_device_cl_version(device);
-
-    cl_bool supports_images = CL_FALSE;
-    error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
-                            sizeof(supports_images), &supports_images, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
-
-    if (supports_images == CL_FALSE)
-    {
-        log_info("No image support on current device - skipped\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    const cl_mem_object_type image_types[] = {
-        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
-        CL_MEM_OBJECT_IMAGE2D,       CL_MEM_OBJECT_IMAGE3D,
-        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY,
-    };
-    const cl_mem_flags mem_flags[] = {
-        CL_MEM_READ_ONLY,
-        CL_MEM_WRITE_ONLY,
-        CL_MEM_KERNEL_READ_AND_WRITE,
-    };
-
-    cl_bool supports_read_write_images = CL_FALSE;
-    if (version >= Version(3, 0))
-    {
-        cl_uint maxReadWriteImageArgs = 0;
-        error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                                sizeof(maxReadWriteImageArgs),
-                                &maxReadWriteImageArgs, NULL);
-        test_error(error,
-                   "Unable to query "
-                   "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS");
-
-        // read-write images are supported if MAX_READ_WRITE_IMAGE_ARGS is
-        // nonzero
-        supports_read_write_images =
-            maxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE;
-    }
-    else if (version >= Version(2, 0))
-    {
-        // read-write images are required for OpenCL 2.x
-        supports_read_write_images = CL_TRUE;
-    }
-
-    int supports_3D_image_writes =
-        is_extension_available(device, "cl_khr_3d_image_writes");
-
-    for (int t = 0; t < ARRAY_SIZE(image_types); t++)
-    {
-        const cl_mem_object_type type = image_types[t];
-        log_info("    testing %s...\n", convert_image_type_to_string(type));
-        for (int f = 0; f < ARRAY_SIZE(mem_flags); f++)
-        {
-            const cl_mem_flags flags = mem_flags[f];
-            const char* testTypeString = flags == CL_MEM_READ_ONLY
-                ? "read-only"
-                : flags == CL_MEM_WRITE_ONLY
-                    ? "write only"
-                    : flags == CL_MEM_KERNEL_READ_AND_WRITE ? "read and write"
-                                                            : "unknown???";
-
-            if (flags == CL_MEM_KERNEL_READ_AND_WRITE
-                && !supports_read_write_images)
-            {
-                continue;
-            }
-
-            if (type == CL_MEM_OBJECT_IMAGE3D && flags != CL_MEM_READ_ONLY
-                && !supports_3D_image_writes)
-            {
-                continue;
-            }
-
-            cl_uint numImageFormats = 0;
-            error = clGetSupportedImageFormats(context, flags, type, 0, NULL,
-                                               &numImageFormats);
-            test_error(error, "Unable to query number of image formats");
-
-            std::vector<cl_image_format> supportedFormats(numImageFormats);
-            if (numImageFormats != 0)
-            {
-                error = clGetSupportedImageFormats(
-                    context, flags, type, supportedFormats.size(),
-                    supportedFormats.data(), NULL);
-                test_error(error, "Unable to query image formats");
-            }
-
-            std::vector<cl_image_format> requiredFormats;
-            build_required_image_formats(flags, type, device, requiredFormats);
-
-            for (auto& format : requiredFormats)
-            {
-                if (!find_format(supportedFormats.data(),
-                                 supportedFormats.size(), &format))
-                {
-                    log_error(
-                        "Missing required %s format %s + %s.\n", testTypeString,
-                        GetChannelOrderName(format.image_channel_order),
-                        GetChannelTypeName(format.image_channel_data_type));
-                    ++missingFormats;
-                }
-            }
-        }
-    }
-
-    return missingFormats == 0 ? TEST_PASS : TEST_FAIL;
-}

diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp
index d412d4e..ba43f18 100644
--- a/test_conformance/api/test_null_buffer_arg.cpp
+++ b/test_conformance/api/test_null_buffer_arg.cpp

@@ -157,13 +157,14 @@
 
     // prep kernel:
     if (gIsEmbedded)
-        status = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &kernel_string, "test_kernel");
+        status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string, NULL);
     else
-        status = create_single_kernel_helper(
-            context, &program, &kernel, 1, &kernel_string_long, "test_kernel");
+        status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string_long, NULL);
 
-    test_error(status, "Unable to create kernel");
+    test_error(status, "Unable to build test program");
+
+    kernel = clCreateKernel(program, "test_kernel", &status);
+    test_error(status, "CreateKernel failed.");
 
     cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
         NULL, NULL);

diff --git a/test_conformance/api/test_pipe_properties_queries.cpp b/test_conformance/api/test_pipe_properties_queries.cpp
deleted file mode 100644
index db91895..0000000
--- a/test_conformance/api/test_pipe_properties_queries.cpp
+++ /dev/null

@@ -1,100 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/typeWrappers.h"
-
-#include <vector>
-
-struct test_query_pipe_properties_data
-{
-    std::vector<cl_pipe_properties> properties;
-    std::string description;
-};
-
-static int create_pipe_and_check_array_properties(
-    cl_context context, const test_query_pipe_properties_data& test_case)
-{
-    log_info("TC description: %s\n", test_case.description.c_str());
-
-    cl_int error = CL_SUCCESS;
-
-    clMemWrapper test_pipe;
-
-    if (test_case.properties.size() > 0)
-    {
-        test_pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, 4, 4,
-                                 test_case.properties.data(), &error);
-        test_error(error, "clCreatePipe failed");
-    }
-    else
-    {
-        test_pipe =
-            clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, 4, 4, NULL, &error);
-        test_error(error, "clCreatePipe failed");
-    }
-
-    std::vector<cl_pipe_properties> check_properties;
-    size_t set_size = 0;
-
-    error = clGetPipeInfo(test_pipe, CL_PIPE_PROPERTIES, 0, NULL, &set_size);
-    test_error(error,
-               "clGetPipeInfo failed asking for "
-               "CL_PIPE_PROPERTIES size.");
-
-    if (set_size == 0 && test_case.properties.size() == 0)
-    {
-        return TEST_PASS;
-    }
-    if (set_size != test_case.properties.size() * sizeof(cl_pipe_properties))
-    {
-        log_error("ERROR: CL_PIPE_PROPERTIES size is %d, expected %d.\n",
-                  set_size,
-                  test_case.properties.size() * sizeof(cl_pipe_properties));
-        return TEST_FAIL;
-    }
-
-    log_error("Unexpected test case size.  This test needs to be updated to "
-              "compare pipe properties.\n");
-    return TEST_FAIL;
-}
-
-int test_pipe_properties_queries(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-{
-    cl_int error = CL_SUCCESS;
-
-    cl_bool pipeSupport = CL_FALSE;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_SUPPORT,
-                            sizeof(pipeSupport), &pipeSupport, NULL);
-    test_error(error, "Unable to query CL_DEVICE_PIPE_SUPPORT");
-
-    if (pipeSupport == CL_FALSE)
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    int result = TEST_PASS;
-
-    std::vector<test_query_pipe_properties_data> test_cases;
-    test_cases.push_back({ {}, "NULL properties" });
-
-    for (auto test_case : test_cases)
-    {
-        result |= create_pipe_and_check_array_properties(context, test_case);
-    }
-
-    return result;
-}

diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index 469a193..0acbe40 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp

@@ -15,10 +15,8 @@
 //
 #include "testBase.h"
 #include "harness/imageHelpers.h"
-#include "harness/propertyHelpers.h"
 #include <stdlib.h>
 #include <ctype.h>
-#include <algorithm>
 
 int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
@@ -131,337 +129,158 @@
     return 0;
 }
 
-template <typename T>
-int sampler_param_test(cl_sampler sampler, cl_sampler_info param_name,
-                       T expected, const char *name)
+int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
-    size_t size;
-    T val;
-    int error = clGetSamplerInfo(sampler, param_name, sizeof(val), &val, &size);
-    test_error(error, "Unable to get sampler info");
-    if (val != expected)
-    {
-        test_fail("ERROR: Sampler %s did not validate!\n", name);
-    }
-    if (size != sizeof(val))
-    {
-        test_fail("ERROR: Returned size of sampler %s does not validate! "
-                  "(expected %d, got %d)\n",
-                  name, (int)sizeof(val), (int)size);
-    }
-    return 0;
-}
-
-static cl_int normalized_coord_values[] = { CL_TRUE, CL_FALSE };
-static cl_addressing_mode addressing_mode_values[] = {
-    CL_ADDRESS_NONE, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP,
-    CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT
-};
-static cl_filter_mode filter_mode_values[] = { CL_FILTER_NEAREST,
-                                               CL_FILTER_LINEAR };
-
-int test_sampler_params(cl_device_id deviceID, cl_context context,
-                        bool is_compatibility, int norm_coord_num,
-                        int addr_mod_num, int filt_mod_num)
-{
-    cl_uint refCount;
-    size_t size;
     int error;
+    size_t size;
 
-    clSamplerWrapper sampler;
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
     cl_sampler_properties properties[] = {
-        CL_SAMPLER_NORMALIZED_COORDS,
-        normalized_coord_values[norm_coord_num],
-        CL_SAMPLER_ADDRESSING_MODE,
-        addressing_mode_values[addr_mod_num],
-        CL_SAMPLER_FILTER_MODE,
-        filter_mode_values[filt_mod_num],
-        0
-    };
+        CL_SAMPLER_NORMALIZED_COORDS, CL_TRUE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR,
+        0 };
+    clSamplerWrapper sampler = clCreateSamplerWithProperties(context, properties, &error);
+    test_error( error, "Unable to create sampler to test with" );
 
-    if (is_compatibility)
+    cl_uint refCount;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get sampler ref count" );
+    if( size != sizeof( refCount ) )
     {
-        sampler =
-            clCreateSampler(context, normalized_coord_values[norm_coord_num],
-                            addressing_mode_values[addr_mod_num],
-                            filter_mode_values[filt_mod_num], &error);
-        test_error(error, "Unable to create sampler to test with");
-    }
-    else
-    {
-        sampler = clCreateSamplerWithProperties(context, properties, &error);
-        test_error(error, "Unable to create sampler to test with");
+        log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
     }
 
-    error = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT,
-                             sizeof(refCount), &refCount, &size);
-    test_error(error, "Unable to get sampler ref count");
-    test_assert_error(size == sizeof(refCount),
-                      "Returned size of sampler refcount does not validate!\n");
-
-    error = sampler_param_test(sampler, CL_SAMPLER_CONTEXT, context, "context");
-    test_error(error, "param checking failed");
-
-    error = sampler_param_test(sampler, CL_SAMPLER_ADDRESSING_MODE,
-                               addressing_mode_values[addr_mod_num],
-                               "addressing mode");
-    test_error(error, "param checking failed");
-
-    error = sampler_param_test(sampler, CL_SAMPLER_FILTER_MODE,
-                               filter_mode_values[filt_mod_num], "filter mode");
-    test_error(error, "param checking failed");
-
-    error = sampler_param_test(sampler, CL_SAMPLER_NORMALIZED_COORDS,
-                               normalized_coord_values[norm_coord_num],
-                               "normalized coords");
-    test_error(error, "param checking failed");
-
-    Version version = get_device_cl_version(deviceID);
-    if (version >= Version(3, 0))
+    cl_context otherCtx;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
+    test_error( error, "Unable to get sampler context" );
+    if( otherCtx != context )
     {
-        std::vector<cl_sampler_properties> test_properties(
-            properties, properties + ARRAY_SIZE(properties));
-
-        std::vector<cl_sampler_properties> check_properties;
-        size_t set_size;
-
-        error = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, 0, NULL,
-                                 &set_size);
-        test_error(
-            error,
-            "clGetSamplerInfo failed asking for CL_SAMPLER_PROPERTIES size.");
-
-        if (is_compatibility)
-        {
-            if (set_size != 0)
-            {
-                log_error(
-                    "ERROR: CL_SAMPLER_PROPERTIES size is %d, expected 0\n",
-                    set_size);
-                return TEST_FAIL;
-            }
-        }
-        else
-        {
-            if (set_size
-                != test_properties.size() * sizeof(cl_sampler_properties))
-            {
-                log_error(
-                    "ERROR: CL_SAMPLER_PROPERTIES size is %d, expected %d.\n",
-                    set_size,
-                    test_properties.size() * sizeof(cl_sampler_properties));
-                return TEST_FAIL;
-            }
-
-            cl_uint number_of_props = set_size / sizeof(cl_sampler_properties);
-            check_properties.resize(number_of_props);
-            error = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, set_size,
-                                     check_properties.data(), 0);
-            test_error(
-                error,
-                "clGetSamplerInfo failed asking for CL_SAMPLER_PROPERTIES.");
-
-            error = compareProperties(check_properties, test_properties);
-            test_error(error, "checkProperties mismatch.");
-        }
+        log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
+        return -1;
     }
-    return 0;
-}
-
-int get_sampler_info_params(cl_device_id deviceID, cl_context context,
-                            bool is_compatibility)
-{
-    for (int norm_coord_num = 0;
-         norm_coord_num < ARRAY_SIZE(normalized_coord_values); norm_coord_num++)
+    if( size != sizeof( otherCtx ) )
     {
-        for (int addr_mod_num = 0;
-             addr_mod_num < ARRAY_SIZE(addressing_mode_values); addr_mod_num++)
-        {
-            if ((normalized_coord_values[norm_coord_num] == CL_FALSE)
-                && ((addressing_mode_values[addr_mod_num] == CL_ADDRESS_REPEAT)
-                    || (addressing_mode_values[addr_mod_num]
-                        == CL_ADDRESS_MIRRORED_REPEAT)))
-            {
-                continue;
-            }
-            for (int filt_mod_num = 0;
-                 filt_mod_num < ARRAY_SIZE(filter_mode_values); filt_mod_num++)
-            {
-                int err = test_sampler_params(deviceID, context,
-                                              is_compatibility, norm_coord_num,
-                                              addr_mod_num, filt_mod_num);
-                test_error(err, "testing clGetSamplerInfo params failed");
-            }
-        }
+        log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
+        return -1;
     }
-    return 0;
-}
-int test_get_sampler_info(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-{
-    int error;
-    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
 
-    error = get_sampler_info_params(deviceID, context, false);
-    test_error(error, "Test Failed");
+    cl_addressing_mode mode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
+    test_error( error, "Unable to get sampler addressing mode" );
+    if( mode != CL_ADDRESS_CLAMP )
+    {
+        log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
+        return -1;
+    }
+    if( size != sizeof( mode ) )
+    {
+        log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
+        return -1;
+    }
+
+    cl_filter_mode fmode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
+    test_error( error, "Unable to get sampler filter mode" );
+    if( fmode != CL_FILTER_LINEAR )
+    {
+        log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
+        return -1;
+    }
+    if( size != sizeof( fmode ) )
+    {
+        log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
+        return -1;
+    }
+
+    cl_int norm;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
+    test_error( error, "Unable to get sampler normalized flag" );
+    if( norm != CL_TRUE )
+    {
+        log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
+        return -1;
+    }
+    if( size != sizeof( norm ) )
+    {
+        log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
+        return -1;
+    }
 
     return 0;
 }
 
-int test_get_sampler_info_compatibility(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements)
-{
-    int error;
-    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
-
-    error = get_sampler_info_params(deviceID, context, true);
-    test_error(error, "Test Failed");
-
-    return 0;
+#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast )    \
+error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get command queue " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
 }
 
-template <typename T>
-int command_queue_param_test(cl_command_queue queue,
-                             cl_command_queue_info param_name, T expected,
-                             const char *name)
-{
-    size_t size;
-    T val;
-    int error =
-        clGetCommandQueueInfo(queue, param_name, sizeof(val), &val, &size);
-    test_error(error, "Unable to get command queue info");
-    if (val != expected)
-    {
-        test_fail("ERROR: Command queue %s did not validate!\n", name);
-    }
-    if (size != sizeof(val))
-    {
-        test_fail("ERROR: Returned size of command queue %s does not validate! "
-                  "(expected %d, got %d)\n",
-                  name, (int)sizeof(val), (int)size);
-    }
-    return 0;
-}
-
-#define MIN_NUM_COMMAND_QUEUE_PROPERTIES 2
-#define OOO_NUM_COMMAND_QUEUE_PROPERTIES 4
-static cl_command_queue_properties property_options[] = {
-    0,
-
-    CL_QUEUE_PROFILING_ENABLE,
-
-    CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE
-        | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
-        | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
-        | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
-};
-
-int check_get_command_queue_info_params(cl_device_id deviceID,
-                                        cl_context context,
-                                        bool is_compatibility)
+int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
 {
     int error;
     size_t size;
 
-    cl_queue_properties host_queue_props, device_queue_props;
-    cl_queue_properties queue_props[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+    cl_queue_properties device_props;
+    cl_queue_properties queue_props[] = {CL_QUEUE_PROPERTIES,0,0};
 
-    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
-                    sizeof(host_queue_props), &host_queue_props, NULL);
-    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
-             (int)host_queue_props);
-    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
-                    sizeof(device_queue_props), &device_queue_props, NULL);
-    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
-             (int)device_queue_props);
+    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", (int)device_props);
 
-    auto version = get_device_cl_version(deviceID);
+    // Mask off vendor extension properties.  Only test standard OpenCL properties
+    device_props &= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE;
 
-    // Are on device queues supported
-    bool on_device_supported =
-        (version >= Version(2, 0) && version < Version(3, 0))
-        || (version >= Version(3, 0) && device_queue_props != 0);
+    queue_props[1] = device_props;
+    clCommandQueueWrapper queue = clCreateCommandQueueWithProperties( context, deviceID, &queue_props[0], &error );
+    test_error( error, "Unable to create command queue to test with" );
 
-    int num_test_options = MIN_NUM_COMMAND_QUEUE_PROPERTIES;
-    if (host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
+    cl_uint refCount;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get command queue reference count" );
+    if( size != sizeof( refCount ) )
     {
-        // Test out-of-order queues properties if supported
-        num_test_options = OOO_NUM_COMMAND_QUEUE_PROPERTIES;
-    }
-    if (on_device_supported && !is_compatibility)
-    {
-        // Test queue on device if supported (in this case out-of-order must
-        // also be supported)
-        num_test_options = ARRAY_SIZE(property_options);
+        log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
     }
 
-    for (int i = 0; i < num_test_options; i++)
-    {
-        queue_props[1] = property_options[i];
-        clCommandQueueWrapper queue;
+    cl_context otherCtx;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
 
-        if (is_compatibility)
-        {
-            queue =
-                clCreateCommandQueue(context, deviceID, queue_props[1], &error);
-            test_error(error, "Unable to create command queue to test with");
-        }
-        else
-        {
-            queue = clCreateCommandQueueWithProperties(context, deviceID,
-                                                       &queue_props[0], &error);
-            test_error(error, "Unable to create command queue to test with");
-        }
+    cl_device_id otherDevice;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
+    test_error(error, "clGetCommandQueue failed.");
 
-        cl_uint refCount;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT,
-                                      sizeof(refCount), &refCount, &size);
-        test_error(error, "Unable to get command queue reference count");
-        test_assert_error(size == sizeof(refCount),
-                          "Returned size of command queue reference count does "
-                          "not validate!\n");
-
-        error = command_queue_param_test(queue, CL_QUEUE_CONTEXT, context,
-                                         "context");
-        test_error(error, "param checking failed");
-
-        error = command_queue_param_test(queue, CL_QUEUE_DEVICE, deviceID,
-                                         "deviceID");
-        test_error(error, "param checking failed");
-
-        error = command_queue_param_test(queue, CL_QUEUE_PROPERTIES,
-                                         queue_props[1], "properties");
-        test_error(error, "param checking failed");
+    if (size != sizeof(cl_device_id)) {
+        log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
+        return -1;
     }
-    return 0;
-}
-int test_get_command_queue_info(cl_device_id deviceID, cl_context context,
-                                cl_command_queue ignoreQueue, int num_elements)
-{
-    int error = check_get_command_queue_info_params(deviceID, context, false);
-    test_error(error, "Test Failed");
-    return 0;
-}
 
-int test_get_command_queue_info_compatibility(cl_device_id deviceID,
-                                              cl_context context,
-                                              cl_command_queue ignoreQueue,
-                                              int num_elements)
-{
-    int error = check_get_command_queue_info_params(deviceID, context, true);
-    test_error(error, "Test Failed");
+    /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+    cl_uint otherDevice_vid, deviceID_vid;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+
+    if( otherDevice_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
+        return -1;
+    }
+
+    cl_command_queue_properties props;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
+
     return 0;
 }
 
@@ -581,8 +400,7 @@
     // extensions can support double but may not support cl_khr_fp64, which implies math library support.
 
     cl_uint baseAddrAlign;
-    TEST_DEVICE_PARAM(deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign,
-                      "base address alignment", "%d bits", int)
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bits", int )

 
     cl_uint maxDataAlign;
     TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )

diff --git a/test_conformance/api/test_queries_compatibility.cpp b/test_conformance/api/test_queries_compatibility.cpp
new file mode 100644
index 0000000..f65c5db
--- /dev/null
+++ b/test_conformance/api/test_queries_compatibility.cpp

@@ -0,0 +1,164 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "harness/imageHelpers.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+int test_get_sampler_info_compatibility(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error );
+    test_error( error, "Unable to create sampler to test with" );
+
+    cl_uint refCount;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get sampler ref count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
+    test_error( error, "Unable to get sampler context" );
+    if( otherCtx != context )
+    {
+        log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
+        return -1;
+    }
+    if( size != sizeof( otherCtx ) )
+    {
+        log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
+        return -1;
+    }
+
+    cl_addressing_mode mode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
+    test_error( error, "Unable to get sampler addressing mode" );
+    if( mode != CL_ADDRESS_CLAMP )
+    {
+        log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
+        return -1;
+    }
+    if( size != sizeof( mode ) )
+    {
+        log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
+        return -1;
+    }
+
+    cl_filter_mode fmode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
+    test_error( error, "Unable to get sampler filter mode" );
+    if( fmode != CL_FILTER_LINEAR )
+    {
+        log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
+        return -1;
+    }
+    if( size != sizeof( fmode ) )
+    {
+        log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
+        return -1;
+    }
+
+    cl_int norm;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
+    test_error( error, "Unable to get sampler normalized flag" );
+    if( norm != CL_TRUE )
+    {
+        log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
+        return -1;
+    }
+    if( size != sizeof( norm ) )
+    {
+        log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
+        return -1;
+    }
+
+    return 0;
+}
+
+#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast )    \
+error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get command queue " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}
+
+int test_get_command_queue_info_compatibility(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    cl_command_queue_properties device_props;
+    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props);
+
+    clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error );
+    test_error( error, "Unable to create command queue to test with" );
+
+    cl_uint refCount;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get command queue reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+    cl_device_id otherDevice;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
+    test_error(error, "clGetCommandQueue failed.");
+
+    if (size != sizeof(cl_device_id)) {
+        log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
+        return -1;
+    }
+
+    /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+    cl_uint otherDevice_vid, deviceID_vid;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+
+    if( otherDevice_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
+        return -1;
+    }
+
+    cl_command_queue_properties props;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
+
+    return 0;
+}
+

diff --git a/test_conformance/api/test_queue.cpp b/test_conformance/api/test_queue.cpp
deleted file mode 100644
index 27ed5f0..0000000
--- a/test_conformance/api/test_queue.cpp
+++ /dev/null

@@ -1,61 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-
-#include "testBase.h"
-#include "harness/typeWrappers.h"
-
-int test_queue_flush_on_release(cl_device_id deviceID, cl_context context,
-                                cl_command_queue defaultQueue, int num_elements)
-{
-    cl_int err;
-
-    // Create a command queue
-    cl_command_queue queue = clCreateCommandQueue(context, deviceID, 0, &err);
-    test_error(err, "Could not create command queue");
-
-    // Create a kernel
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    const char *source = "void kernel test(){}";
-    err = create_single_kernel_helper(context, &program, &kernel, 1, &source,
-                                      "test");
-    test_error(err, "Could not create kernel");
-
-    // Enqueue the kernel
-    size_t gws = 1;
-    clEventWrapper event;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &gws, nullptr, 0,
-                                 nullptr, &event);
-    test_error(err, "Could not enqueue kernel");
-
-    // Release the queue
-    err = clReleaseCommandQueue(queue);
-
-    // Wait for kernel to execute since the queue must flush on release
-    bool success = poll_until(2000, 50, [&event]() {
-        cl_int status;
-        cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                                    sizeof(cl_int), &status, nullptr);
-        if ((err != CL_SUCCESS) || (status != CL_COMPLETE))
-        {
-            return false;
-        }
-        return true;
-    });
-
-    return success ? TEST_PASS : TEST_FAIL;
-}

diff --git a/test_conformance/api/test_queue_properties_queries.cpp b/test_conformance/api/test_queue_properties_queries.cpp
deleted file mode 100644
index 843fa84..0000000
--- a/test_conformance/api/test_queue_properties_queries.cpp
+++ /dev/null

@@ -1,269 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/propertyHelpers.h"
-#include "harness/typeWrappers.h"
-#include <vector>
-#include <algorithm>
-
-struct test_queue_array_properties_data
-{
-    std::vector<cl_queue_properties> properties;
-    std::string description;
-};
-
-int verify_if_properties_supported(
-    cl_device_id deviceID, cl_command_queue_properties requested_bitfield,
-    cl_uint requested_size)
-{
-    int error = CL_SUCCESS;
-    bool on_host_queue = true;
-
-    if (requested_bitfield & CL_QUEUE_ON_DEVICE)
-    {
-        on_host_queue = false;
-
-        if (requested_size > 0)
-        {
-            cl_uint max_queue_size = 0;
-            error =
-                clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
-                                sizeof(max_queue_size), &max_queue_size, NULL);
-            test_error(error,
-                       "clGetDeviceInfo for "
-                       "CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE failed");
-            if (requested_size > max_queue_size)
-            {
-                log_info(
-                    "The value of CL_QUEUE_SIZE = %d cannot be bigger than "
-                    "CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE = %d, skipped\n",
-                    requested_size, max_queue_size);
-                return TEST_SKIPPED_ITSELF;
-            }
-        }
-    }
-
-    cl_command_queue_properties supported_properties = 0;
-    cl_command_queue_properties all_properties = 0;
-
-    std::vector<cl_command_queue_properties> all_properties_vector{
-        CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE
-    };
-    for (auto each_property : all_properties_vector)
-    {
-        all_properties |= each_property;
-    }
-    cl_command_queue_properties requested_properties =
-        all_properties & requested_bitfield;
-
-    if (on_host_queue)
-    {
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
-                                sizeof(supported_properties),
-                                &supported_properties, NULL);
-        test_error(error,
-                   "clGetDeviceInfo asking for "
-                   "CL_DEVICE_QUEUE_ON_HOST_PROPERTIES failed");
-    }
-    else
-    {
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
-                                sizeof(supported_properties),
-                                &supported_properties, NULL);
-        test_error(error,
-                   "clGetDeviceInfo asking for "
-                   "CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES failed");
-    }
-
-    for (auto each_property : all_properties_vector)
-    {
-        if ((each_property & requested_properties)
-            && !(each_property & supported_properties))
-        {
-            log_info("\t%s not supported, skipped\n",
-                     GetQueuePropertyName(each_property));
-            return TEST_SKIPPED_ITSELF;
-        }
-        else if ((each_property & requested_properties)
-                 && each_property & supported_properties)
-        {
-            log_info("\t%s supported\n", GetQueuePropertyName(each_property));
-        }
-    }
-    return error;
-}
-
-static int create_queue_and_check_array_properties(
-    cl_context context, cl_device_id deviceID,
-    test_queue_array_properties_data test_case)
-{
-    cl_int error = CL_SUCCESS;
-
-    clCommandQueueWrapper test_queue;
-
-    if (test_case.properties.size() > 0)
-    {
-        test_queue = clCreateCommandQueueWithProperties(
-            context, deviceID, test_case.properties.data(), &error);
-        test_error(error, "clCreateCommandQueueWithProperties failed");
-    }
-    else
-    {
-        test_queue =
-            clCreateCommandQueueWithProperties(context, deviceID, NULL, &error);
-        test_error(error, "clCreateCommandQueueWithProperties failed");
-    }
-
-    std::vector<cl_queue_properties> check_properties;
-    size_t set_size = 0;
-
-    error = clGetCommandQueueInfo(test_queue, CL_QUEUE_PROPERTIES_ARRAY, 0,
-                                  NULL, &set_size);
-    test_error(error,
-               "clGetCommandQueueInfo failed asking for "
-               "CL_QUEUE_PROPERTIES_ARRAY size.");
-
-    if (set_size == 0 && test_case.properties.size() == 0)
-    {
-        return TEST_PASS;
-    }
-    if (set_size != test_case.properties.size() * sizeof(cl_queue_properties))
-    {
-        log_error("ERROR: CL_QUEUE_PROPERTIES_ARRAY size is %d, expected %d.\n",
-                  set_size,
-                  test_case.properties.size() * sizeof(cl_queue_properties));
-        return TEST_FAIL;
-    }
-
-    cl_uint number_of_props = set_size / sizeof(cl_queue_properties);
-    check_properties.resize(number_of_props);
-    error = clGetCommandQueueInfo(test_queue, CL_QUEUE_PROPERTIES_ARRAY,
-                                  set_size, check_properties.data(), NULL);
-    test_error(
-        error,
-        "clGetCommandQueueInfo failed asking for CL_QUEUE_PROPERTIES_ARRAY.");
-
-    error = compareProperties(check_properties, test_case.properties);
-    return error;
-}
-
-static int
-run_test_queue_array_properties(cl_context context, cl_device_id deviceID,
-                                test_queue_array_properties_data test_case)
-{
-    int error = TEST_PASS;
-
-    std::vector<cl_queue_properties> requested_properties =
-        test_case.properties;
-    log_info("\nTC description: %s\n", test_case.description.c_str());
-
-    // first verify if user properties are supported
-    if (requested_properties.size() != 0)
-    {
-        requested_properties.pop_back();
-        cl_command_queue_properties requested_bitfield = 0;
-        cl_uint requested_size = 0;
-        for (cl_uint i = 0; i < requested_properties.size(); i = i + 2)
-        {
-            if (requested_properties[i] == CL_QUEUE_PROPERTIES)
-            {
-                requested_bitfield = requested_properties[i + 1];
-            }
-            if (requested_properties[i] == CL_QUEUE_SIZE)
-            {
-                requested_size = requested_properties[i + 1];
-            }
-        }
-
-        error = verify_if_properties_supported(deviceID, requested_bitfield,
-                                               requested_size);
-        if (error == TEST_SKIPPED_ITSELF)
-        {
-            log_info("TC result: skipped\n");
-            return TEST_PASS;
-        }
-        test_error(error,
-                   "Checking which queue properties supported failed.\n");
-    }
-
-    // continue testing if supported user properties
-    error =
-        create_queue_and_check_array_properties(context, deviceID, test_case);
-    test_error(error, "create_queue_and_check_array_properties failed.\n");
-
-    log_info("TC result: passed\n");
-    return TEST_PASS;
-}
-
-int test_queue_properties_queries(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-{
-    int error = TEST_PASS;
-    std::vector<test_queue_array_properties_data> test_cases;
-
-    test_cases.push_back({ {}, "host queue, NULL properties" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES, 0, 0 }, "host queue, zero properties" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0 },
-          "host queue, CL_QUEUE_PROFILING_ENABLE" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 },
-          "host queue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES,
-            CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-            0 },
-          "host queue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | "
-          "CL_QUEUE_PROFILING_ENABLE" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES,
-            CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, 0 },
-          "device queue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | "
-          "CL_QUEUE_ON_DEVICE" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES,
-            CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE
-                | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE,
-            CL_QUEUE_SIZE, 124, 0 },
-          "device queue, all possible properties" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES,
-            CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE
-                | CL_QUEUE_PROFILING_ENABLE,
-            CL_QUEUE_SIZE, 124, 0 },
-          "device queue, all without CL_QUEUE_ON_DEVICE_DEFAULT" });
-
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES,
-            CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE
-                | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE,
-            0 },
-          "device queue, all without CL_QUEUE_SIZE" });
-
-    for (auto test_case : test_cases)
-    {
-        error |= run_test_queue_array_properties(context, deviceID, test_case);
-    }
-    return error;
-}

diff --git a/test_conformance/api/test_retain.cpp b/test_conformance/api/test_retain.cpp
index 6e66c7d..cf065bc 100644
--- a/test_conformance/api/test_retain.cpp
+++ b/test_conformance/api/test_retain.cpp

@@ -251,9 +251,11 @@
     err = clSetMemObjectDestructorCallback( buffer, callback, nullptr );
     test_error( err, "Unable to set destructor callback" );
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      testProgram, "sample_test");
-    test_error(err, "Unable to build sample program and sample_test kernel");
+    err = create_single_kernel_helper( context, &program, nullptr, 1, testProgram, nullptr );
+    test_error( err, "Unable to build sample program" );
+
+    kernel = clCreateKernel( program, "sample_test", &err );
+    test_error( err, "Unable to create sample_test kernel" );
 
     err = clSetKernelArg( kernel, 0, sizeof(cl_mem), &buffer );
     test_error( err, "Unable to set kernel argument" );

diff --git a/test_conformance/api/test_retain_program.cpp b/test_conformance/api/test_retain_program.cpp
index b9fc8b7..a85bc70 100644
--- a/test_conformance/api/test_retain_program.cpp
+++ b/test_conformance/api/test_retain_program.cpp

@@ -28,11 +28,14 @@
     int error;
     const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
 
-    /* Create a test program and kernel from it */
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        testProgram, "sample_test");
+    /* Create a test program */
+    error = create_single_kernel_helper(context, &program, NULL, 1, testProgram, NULL);
     test_error( error, "Unable to build sample program to test with" );
 
+    /* And create a kernel from it */
+    kernel = clCreateKernel( program, "sample_test", &error );
+    test_error( error, "Unable to create kernel" );
+
     /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
     clReleaseProgram( program );
     clReleaseKernel( kernel );
@@ -65,11 +68,9 @@
         return -1;
     }
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 10, NULL, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 10,
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
 
     /* Set the arguments */

diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp
index 01d0ffa..c0dc372 100644
--- a/test_conformance/api/test_sub_group_dispatch.cpp
+++ b/test_conformance/api/test_sub_group_dispatch.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -75,29 +75,7 @@
     size_t ret_ndrange2d_flattened;
     size_t ret_ndrange3d_flattened;
 
-    if (get_device_cl_version(deviceID) >= Version(3, 0))
-    {
-        int error;
-        cl_uint max_num_sub_groups;
-
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_NUM_SUB_GROUPS,
-                                sizeof(max_num_sub_groups), &max_num_sub_groups,
-                                NULL);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "Unable to get max num subgroups");
-            return error;
-        }
-
-        if (max_num_sub_groups == 0)
-        {
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
-
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        subgroup_dispatch_kernel,
-                                        "subgroup_dispatch_kernel");
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, subgroup_dispatch_kernel, "subgroup_dispatch_kernel", "-cl-std=CL2.0");
     if (error != 0)
         return error;
 

diff --git a/test_conformance/api/test_zero_sized_enqueue.cpp b/test_conformance/api/test_zero_sized_enqueue.cpp
index 7efb32c..dabe75f 100644
--- a/test_conformance/api/test_zero_sized_enqueue.cpp
+++ b/test_conformance/api/test_zero_sized_enqueue.cpp

@@ -17,15 +17,14 @@
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 
-const char* zero_sized_enqueue_test_kernel[] = {
-    "__kernel void foo_kernel(__global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = 1;\n"
-    "\n"
-    "}\n"
-};
+const char *zero_sized_enqueue_test_kernel[] = {
+"__kernel void foo_kernel(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
 
 const int bufSize = 128;
 
@@ -63,7 +62,7 @@
     int error;
     clProgramWrapper program;
     clKernelWrapper kernel;
-    clMemWrapper output_stream;
+    clMemWrapper            streams[2];
     size_t    ndrange1 = 0;
     size_t    ndrange20[2] = {0, 0};
     size_t    ndrange21[2] = {1, 0};
@@ -77,15 +76,15 @@
     size_t    ndrange35[3] = {1, 0, 1};
     size_t    ndrange36[3] = {1, 1, 0};
 
-    output_stream =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-                       bufSize * sizeof(int), NULL, &error);
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
 
-    // Initialise output buffer.
-    int output_buffer_data = 0;
-    error = clEnqueueFillBuffer(queue, output_stream, &output_buffer_data,
-                                sizeof(int), 0, sizeof(int) * bufSize, 0, NULL,
-                                NULL);
+    int* buf = new int[bufSize];
+    memset(buf, 0, sizeof(int) * bufSize);
+
+    // update output buffer
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(int) * bufSize, buf, 0, NULL, NULL);
+
 
     /* Create a kernel to test with */
     if( create_single_kernel_helper( context, &program, &kernel, 1, zero_sized_enqueue_test_kernel, "foo_kernel" ) != 0 )
@@ -93,53 +92,44 @@
         return -1;
     }
 
-    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output_stream);
+    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
+    test_error( error, "clSetKernelArg failed." );
+    error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]);
     test_error( error, "clSetKernelArg failed." );
 
     // Simple API return code tests for 1D, 2D and 3D zero sized ND range.
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 1, &ndrange1);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 1, &ndrange1);
     test_error( error, "1D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 2, ndrange20);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange20);
     test_error( error, "2D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 2, ndrange21);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange21);
     test_error( error, "2D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 2, ndrange22);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange22);
     test_error( error, "2D zero sized kernel enqueue failed." );
 
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange30);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange30);
     test_error( error, "3D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange31);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange31);
     test_error( error, "3D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange32);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange32);
     test_error( error, "3D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange33);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange33);
     test_error( error, "3D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange34);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange34);
     test_error( error, "3D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange35);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange35);
     test_error( error, "3D zero sized kernel enqueue failed." );
 
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange36);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange36);
     test_error( error, "3D zero sized kernel enqueue failed." );
 
     // Verify zero-sized ND range kernel still satisfy event wait list and correct event object
@@ -159,7 +149,7 @@
     error = clGetEventInfo(ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &sta, NULL);
     test_error( error, "Failed to get event status.");
 
-    if (sta != CL_QUEUED && sta != CL_SUBMITTED)
+    if (sta != CL_QUEUED)
     {
         log_error( "ERROR: incorrect zero sized kernel enqueue event status.\n" );
         return -1;
@@ -181,6 +171,8 @@
         return -1;
     }
 
+    delete [] buf;
+
     return 0;
 }
 

diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp
index afdea37..6904d7c 100644
--- a/test_conformance/atomics/main.cpp
+++ b/test_conformance/atomics/main.cpp

@@ -45,6 +45,6 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index 34b34ed..24bf974 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp

@@ -243,17 +243,13 @@
     for( size_t i = 0; i < numDestItems; i++ )
         memcpy( destItems + i * typeSize, startValue, typeSize );
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                typeSize * numDestItems, destItems, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, destItems, NULL);
     if (!streams[0])
     {
         log_error("ERROR: Creating output array failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(
-        context,
-        ((startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
-        typeSize * threadSize, startRefValues, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(( startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE )), typeSize * threadSize, startRefValues, NULL);
     if (!streams[1])
     {
         log_error("ERROR: Creating reference array failed!\n");
@@ -1004,7 +1000,8 @@
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 64;
     cl_long bits = (cl_long)0xffffffffffffffffLL;
-    for (size_t i = 0; i < numBits; i++) bits &= ~(1LL << i);
+    for( size_t i = 0; i < numBits; i++ )
+        bits &= ~( 1 << i );
 
     return bits;
 }
@@ -1085,16 +1082,18 @@
 #pragma mark ---- xor
 
 const char atom_xor_core[] =
-    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-    "    int  bitIndex = tid & ( numBits - 1 );\n"
-    "\n"
-    "    oldValues[tid] = atom_xor( &destMemory[0], 1L << bitIndex );\n";
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  bitIndex = tid & ( numBits - 1 );\n"
+"\n"
+"    oldValues[tid] = atom_xor( &destMemory[0], 1 << bitIndex );\n"
+;
 
 const char atomic_xor_core[] =
-    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-    "    int  bitIndex = tid & ( numBits - 1 );\n"
-    "\n"
-    "    oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  bitIndex = tid & ( numBits - 1 );\n"
+"\n"
+"    oldValues[tid] = atomic_xor( &destMemory[0], 1 << bitIndex );\n"
+;
 
 cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
 {

diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index b85e3d2..9a27d07 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp

@@ -64,12 +64,12 @@
              (int)numGlobalThreads, (int)numLocalThreads);
 
     // Create the counter that will keep track of where each thread writes.
-    counter = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1,
-                             NULL, NULL);
+    counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                   sizeof(cl_int) * 1, NULL, NULL);
     // Create the counters that will hold the results of each thread writing
     // its ID into a (hopefully) unique location.
-    counters = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_int) * numGlobalThreads, NULL, NULL);
+    counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                    sizeof(cl_int) * numGlobalThreads, NULL, NULL);
 
     // Reset all those locations to -1 to indciate they have not been used.
     cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
@@ -175,15 +175,12 @@
              (int)global_threads[0], (int)local_threads[0]);
 
     // Allocate our storage
-    cl_mem bin_counters =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_int) * number_of_bins, NULL, NULL);
-    cl_mem bins = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL);
-    cl_mem bin_assignments =
-        clCreateBuffer(context, CL_MEM_READ_ONLY,
-                       sizeof(cl_int) * number_of_items, NULL, NULL);
+    cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                        sizeof(cl_int) * number_of_bins, NULL, NULL);
+    cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
+    cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                           sizeof(cl_int) * number_of_items, NULL, NULL);
 
     if (bin_counters == NULL) {
         log_error("add_index_bin_test FAILED to allocate bin_counters.\n");

diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
index c5c4b5f..d73b84a 100644
--- a/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/basic/CMakeLists.txt

@@ -37,12 +37,8 @@
     test_work_item_functions.cpp
     test_astype.cpp
     test_async_copy.cpp
-    test_async_copy2D.cpp
-    test_async_copy3D.cpp
-    test_async_copy_fence.cpp	
     test_sizeof.cpp
     test_vector_creation.cpp
-    test_vector_swizzle.cpp
     test_vec_type_hint.cpp
     test_numeric_constants.cpp
     test_constant_source.cpp

diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp
index 86c3cec..11ed2c3 100644
--- a/test_conformance/basic/main.cpp
+++ b/test_conformance/basic/main.cpp

@@ -26,147 +26,136 @@
 #include "procs.h"
 
 test_definition test_list[] = {
-    ADD_TEST(hostptr),
-    ADD_TEST(fpmath_float),
-    ADD_TEST(fpmath_float2),
-    ADD_TEST(fpmath_float4),
-    ADD_TEST(intmath_int),
-    ADD_TEST(intmath_int2),
-    ADD_TEST(intmath_int4),
-    ADD_TEST(intmath_long),
-    ADD_TEST(intmath_long2),
-    ADD_TEST(intmath_long4),
-    ADD_TEST(hiloeo),
-    ADD_TEST(if),
-    ADD_TEST(sizeof),
-    ADD_TEST(loop),
-    ADD_TEST(pointer_cast),
-    ADD_TEST(local_arg_def),
-    ADD_TEST(local_kernel_def),
-    ADD_TEST(local_kernel_scope),
-    ADD_TEST(constant),
-    ADD_TEST(constant_source),
-    ADD_TEST(readimage),
-    ADD_TEST(readimage_int16),
-    ADD_TEST(readimage_fp32),
-    ADD_TEST(writeimage),
-    ADD_TEST(writeimage_int16),
-    ADD_TEST(writeimage_fp32),
-    ADD_TEST(mri_one),
+    ADD_TEST( hostptr ),
+    ADD_TEST( fpmath_float ),
+    ADD_TEST( fpmath_float2 ),
+    ADD_TEST( fpmath_float4 ),
+    ADD_TEST( intmath_int ),
+    ADD_TEST( intmath_int2 ),
+    ADD_TEST( intmath_int4 ),
+    ADD_TEST( intmath_long ),
+    ADD_TEST( intmath_long2 ),
+    ADD_TEST( intmath_long4 ),
+    ADD_TEST( hiloeo ),
+    ADD_TEST( if ),
+    ADD_TEST( sizeof ),
+    ADD_TEST( loop ),
+    ADD_TEST( pointer_cast ),
+    ADD_TEST( local_arg_def ),
+    ADD_TEST( local_kernel_def ),
+    ADD_TEST( local_kernel_scope ),
+    ADD_TEST( constant ),
+    ADD_TEST( constant_source ),
+    ADD_TEST( readimage ),
+    ADD_TEST( readimage_int16 ),
+    ADD_TEST( readimage_fp32 ),
+    ADD_TEST( writeimage ),
+    ADD_TEST( writeimage_int16 ),
+    ADD_TEST( writeimage_fp32 ),
+    ADD_TEST( mri_one ),
 
-    ADD_TEST(mri_multiple),
-    ADD_TEST(image_r8),
-    ADD_TEST(barrier),
-    ADD_TEST_VERSION(wg_barrier, Version(2, 0)),
-    ADD_TEST(int2float),
-    ADD_TEST(float2int),
-    ADD_TEST(imagereadwrite),
-    ADD_TEST(imagereadwrite3d),
-    ADD_TEST(readimage3d),
-    ADD_TEST(readimage3d_int16),
-    ADD_TEST(readimage3d_fp32),
-    ADD_TEST(bufferreadwriterect),
-    ADD_TEST(arrayreadwrite),
-    ADD_TEST(arraycopy),
-    ADD_TEST(imagearraycopy),
-    ADD_TEST(imagearraycopy3d),
-    ADD_TEST(imagecopy),
-    ADD_TEST(imagecopy3d),
-    ADD_TEST(imagerandomcopy),
-    ADD_TEST(arrayimagecopy),
-    ADD_TEST(arrayimagecopy3d),
-    ADD_TEST(imagenpot),
+    ADD_TEST( mri_multiple ),
+    ADD_TEST( image_r8 ),
+    ADD_TEST( barrier ),
+    ADD_TEST_VERSION( wg_barrier, Version(2, 0) ),
+    ADD_TEST( int2float ),
+    ADD_TEST( float2int ),
+    ADD_TEST( imagereadwrite ),
+    ADD_TEST( imagereadwrite3d ),
+    ADD_TEST( readimage3d ),
+    ADD_TEST( readimage3d_int16 ),
+    ADD_TEST( readimage3d_fp32 ),
+    ADD_TEST( bufferreadwriterect ),
+    ADD_TEST( arrayreadwrite ),
+    ADD_TEST( arraycopy ),
+    ADD_TEST( imagearraycopy ),
+    ADD_TEST( imagearraycopy3d ),
+    ADD_TEST( imagecopy ),
+    ADD_TEST( imagecopy3d ),
+    ADD_TEST( imagerandomcopy ),
+    ADD_TEST( arrayimagecopy ),
+    ADD_TEST( arrayimagecopy3d ),
+    ADD_TEST( imagenpot ),
 
-    ADD_TEST(vload_global),
-    ADD_TEST(vload_local),
-    ADD_TEST(vload_constant),
-    ADD_TEST(vload_private),
-    ADD_TEST(vstore_global),
-    ADD_TEST(vstore_local),
-    ADD_TEST(vstore_private),
+    ADD_TEST( vload_global ),
+    ADD_TEST( vload_local ),
+    ADD_TEST( vload_constant ),
+    ADD_TEST( vload_private ),
+    ADD_TEST( vstore_global ),
+    ADD_TEST( vstore_local ),
+    ADD_TEST( vstore_private ),
 
-    ADD_TEST(createkernelsinprogram),
-    ADD_TEST(imagedim_pow2),
-    ADD_TEST(imagedim_non_pow2),
-    ADD_TEST(image_param),
-    ADD_TEST(image_multipass_integer_coord),
-    ADD_TEST(image_multipass_float_coord),
-    ADD_TEST(explicit_s2v_char),
-    ADD_TEST(explicit_s2v_uchar),
-    ADD_TEST(explicit_s2v_short),
-    ADD_TEST(explicit_s2v_ushort),
-    ADD_TEST(explicit_s2v_int),
-    ADD_TEST(explicit_s2v_uint),
-    ADD_TEST(explicit_s2v_long),
-    ADD_TEST(explicit_s2v_ulong),
-    ADD_TEST(explicit_s2v_float),
-    ADD_TEST(explicit_s2v_double),
+    ADD_TEST( createkernelsinprogram ),
+    ADD_TEST( imagedim_pow2 ),
+    ADD_TEST( imagedim_non_pow2 ),
+    ADD_TEST( image_param ),
+    ADD_TEST( image_multipass_integer_coord ),
+    ADD_TEST( image_multipass_float_coord ),
+    ADD_TEST( explicit_s2v_char ),
+    ADD_TEST( explicit_s2v_uchar ),
+    ADD_TEST( explicit_s2v_short ),
+    ADD_TEST( explicit_s2v_ushort ),
+    ADD_TEST( explicit_s2v_int ),
+    ADD_TEST( explicit_s2v_uint ),
+    ADD_TEST( explicit_s2v_long ),
+    ADD_TEST( explicit_s2v_ulong ),
+    ADD_TEST( explicit_s2v_float ),
+    ADD_TEST( explicit_s2v_double ),
 
-    ADD_TEST(enqueue_map_buffer),
-    ADD_TEST(enqueue_map_image),
+    ADD_TEST( enqueue_map_buffer ),
+    ADD_TEST( enqueue_map_image ),
 
-    ADD_TEST(work_item_functions),
+    ADD_TEST( work_item_functions ),
 
-    ADD_TEST(astype),
+    ADD_TEST( astype ),
 
-    ADD_TEST(async_copy_global_to_local),
-    ADD_TEST(async_copy_local_to_global),
-    ADD_TEST(async_strided_copy_global_to_local),
-    ADD_TEST(async_strided_copy_local_to_global),
-    ADD_TEST(async_copy_global_to_local2D),
-    ADD_TEST(async_copy_local_to_global2D),
-    ADD_TEST(async_copy_global_to_local3D),
-    ADD_TEST(async_copy_local_to_global3D),
-    ADD_TEST(async_work_group_copy_fence_import_after_export_aliased_local),
-    ADD_TEST(async_work_group_copy_fence_import_after_export_aliased_global),
-    ADD_TEST(
-        async_work_group_copy_fence_import_after_export_aliased_global_and_local),
-    ADD_TEST(async_work_group_copy_fence_export_after_import_aliased_local),
-    ADD_TEST(async_work_group_copy_fence_export_after_import_aliased_global),
-    ADD_TEST(
-        async_work_group_copy_fence_export_after_import_aliased_global_and_local),
-    ADD_TEST(prefetch),
-    ADD_TEST(kernel_call_kernel_function),
-    ADD_TEST(host_numeric_constants),
-    ADD_TEST(kernel_numeric_constants),
-    ADD_TEST(kernel_limit_constants),
-    ADD_TEST(kernel_preprocessor_macros),
-    ADD_TEST(parameter_types),
-    ADD_TEST(vector_creation),
-    ADD_TEST(vector_swizzle),
-    ADD_TEST(vec_type_hint),
-    ADD_TEST(kernel_memory_alignment_local),
-    ADD_TEST(kernel_memory_alignment_global),
-    ADD_TEST(kernel_memory_alignment_constant),
-    ADD_TEST(kernel_memory_alignment_private),
+    ADD_TEST( async_copy_global_to_local ),
+    ADD_TEST( async_copy_local_to_global ),
+    ADD_TEST( async_strided_copy_global_to_local ),
+    ADD_TEST( async_strided_copy_local_to_global ),
+    ADD_TEST( prefetch ),
 
-    ADD_TEST_VERSION(progvar_prog_scope_misc, Version(2, 0)),
-    ADD_TEST_VERSION(progvar_prog_scope_uninit, Version(2, 0)),
-    ADD_TEST_VERSION(progvar_prog_scope_init, Version(2, 0)),
-    ADD_TEST_VERSION(progvar_func_scope, Version(2, 0)),
+    ADD_TEST( kernel_call_kernel_function ),
+    ADD_TEST( host_numeric_constants ),
+    ADD_TEST( kernel_numeric_constants ),
+    ADD_TEST( kernel_limit_constants ),
+    ADD_TEST( kernel_preprocessor_macros ),
 
-    ADD_TEST(global_work_offsets),
-    ADD_TEST(get_global_offset),
+    ADD_TEST( parameter_types ),
+    ADD_TEST( vector_creation ),
+    ADD_TEST( vec_type_hint ),
+    ADD_TEST( kernel_memory_alignment_local ),
+    ADD_TEST( kernel_memory_alignment_global ),
+    ADD_TEST( kernel_memory_alignment_constant ),
+    ADD_TEST( kernel_memory_alignment_private ),
 
-    ADD_TEST_VERSION(global_linear_id, Version(2, 0)),
-    ADD_TEST_VERSION(local_linear_id, Version(2, 0)),
-    ADD_TEST_VERSION(enqueued_local_size, Version(2, 0)),
+    ADD_TEST_VERSION( progvar_prog_scope_misc, Version(2, 0) ),
+    ADD_TEST_VERSION( progvar_prog_scope_uninit, Version(2, 0) ),
+    ADD_TEST_VERSION( progvar_prog_scope_init, Version(2, 0) ),
+    ADD_TEST_VERSION( progvar_func_scope, Version(2, 0) ),
 
-    ADD_TEST(simple_read_image_pitch),
-    ADD_TEST(simple_write_image_pitch),
+    ADD_TEST( global_work_offsets ),
+    ADD_TEST( get_global_offset ),
+
+    ADD_TEST_VERSION( global_linear_id, Version(2, 0) ),
+    ADD_TEST_VERSION( local_linear_id, Version(2, 0) ),
+    ADD_TEST_VERSION( enqueued_local_size, Version(2, 0) ),
+
+    ADD_TEST( simple_read_image_pitch ),
+    ADD_TEST( simple_write_image_pitch ),
 
 #if defined( __APPLE__ )
-    ADD_TEST(queue_priority),
+    ADD_TEST( queue_priority ),
 #endif
 
-    ADD_TEST_VERSION(get_linear_ids, Version(2, 0)),
-    ADD_TEST_VERSION(rw_image_access_qualifier, Version(2, 0)),
+    ADD_TEST_VERSION( get_linear_ids, Version(2, 0) ),
+    ADD_TEST_VERSION( rw_image_access_qualifier, Version(2, 0) ),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h
index 4a01a8c..9fe17ef 100644
--- a/test_conformance/basic/procs.h
+++ b/test_conformance/basic/procs.h

@@ -115,42 +115,6 @@
 extern int      test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_async_copy_global_to_local2D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_copy_local_to_global2D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_copy_global_to_local3D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_copy_local_to_global3D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_work_group_copy_fence_import_after_export_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_async_work_group_copy_fence_import_after_export_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int
-test_async_work_group_copy_fence_import_after_export_aliased_global_and_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_async_work_group_copy_fence_export_after_import_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_async_work_group_copy_fence_export_after_import_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int
-test_async_work_group_copy_fence_export_after_import_aliased_global_and_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
 extern int      test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
 extern int      test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
@@ -162,12 +126,8 @@
 extern int      test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
 extern int      test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_vector_creation(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue, int num_elements);
-extern int test_vector_swizzle(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_vec_type_hint(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
+extern int      test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
 
 extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );

diff --git a/test_conformance/basic/test_arraycopy.cpp b/test_conformance/basic/test_arraycopy.cpp
index 5a35286..e0cb565 100644
--- a/test_conformance/basic/test_arraycopy.cpp
+++ b/test_conformance/basic/test_arraycopy.cpp

@@ -51,8 +51,7 @@
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
 
     // results
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             sizeof(cl_uint) * num_elements, NULL, &err);
+    results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed");
 
 /*****************************************************************************************************************************************/
@@ -65,9 +64,7 @@
         input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
 
     // client backing
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                       sizeof(cl_uint) * num_elements, input_ptr, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
     test_error(err, "clCreateBuffer failed");
 
     delta_offset = num_elements * sizeof(cl_uint) / num_copies;
@@ -106,8 +103,7 @@
         input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
 
     // no backing
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed");
 
     for (i=0; i<num_copies; i++)
@@ -150,20 +146,17 @@
     free_mtdata(d); d= NULL;
 
     // client backing
-    streams[3] =
-        clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                       sizeof(cl_uint) * num_elements, input_ptr, &err);
-    test_error(err, "clCreateBuffer failed");
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+  test_error(err, "clCreateBuffer failed");
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &copy_kernel_code, "test_copy");
-    test_error(err, "create_single_kernel_helper failed");
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
+  test_error(err, "create_single_kernel_helper failed");
 
-    err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
-    err |= clSetKernelArg(kernel, 1, sizeof results, &results);
-    test_error(err, "clSetKernelArg failed");
+  err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
+  err |= clSetKernelArg(kernel, 1, sizeof results, &results);
+  test_error(err, "clSetKernelArg failed");
 
-    size_t threads[3] = { num_elements, 0, 0 };
+  size_t threads[3] = {num_elements, 0, 0};
 
     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
   test_error(err, "clEnqueueNDRangeKernel failed");

diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp
index 5de5d01..5a0263f 100644
--- a/test_conformance/basic/test_arrayimagecopy.cpp
+++ b/test_conformance/basic/test_arrayimagecopy.cpp

@@ -38,8 +38,7 @@
 
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
 
-  image = create_image_2d(context, CL_MEM_READ_WRITE, format, img_width,
-                          img_height, 0, NULL, &err);
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
   test_error(err, "create_image_2d failed");
 
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -47,7 +46,7 @@
 
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
 
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
 
   d = init_genrand( gRandomSeed );

diff --git a/test_conformance/basic/test_arrayimagecopy3d.cpp b/test_conformance/basic/test_arrayimagecopy3d.cpp
index 1b08ec9..d1d3652 100644
--- a/test_conformance/basic/test_arrayimagecopy3d.cpp
+++ b/test_conformance/basic/test_arrayimagecopy3d.cpp

@@ -39,8 +39,7 @@
 
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
 
-  image = create_image_3d(context, CL_MEM_READ_ONLY, format, img_width,
-                          img_height, img_depth, 0, 0, NULL, &err);
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
   test_error(err, "create_image_3d failed");
 
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -48,7 +47,7 @@
 
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
 
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
 
   d = init_genrand( gRandomSeed );
@@ -126,15 +125,12 @@
 
   PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
 
-  err = clGetSupportedImageFormats(
-      context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
   test_error(err, "clGetSupportedImageFormats failed");
 
   formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
 
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE3D, num_formats, formats,
-                                   NULL);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
   test_error(err, "clGetSupportedImageFormats failed");
 
   for (i = 0; i < num_formats; i++) {

diff --git a/test_conformance/basic/test_arrayreadwrite.cpp b/test_conformance/basic/test_arrayreadwrite.cpp
index 25e8ed9..6866439 100644
--- a/test_conformance/basic/test_arrayreadwrite.cpp
+++ b/test_conformance/basic/test_arrayreadwrite.cpp

@@ -43,8 +43,7 @@
     for (i=0; i<num_elements; i++)
         inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed");
 
     for (i=0; i<num_tries; i++)

diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
deleted file mode 100644
index 9fbdcb6..0000000
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ /dev/null

@@ -1,449 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "../../test_common/harness/compat.h"
-
-#include <algorithm>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "../../test_common/harness/conversions.h"
-#include "procs.h"
-
-static const char *async_global_to_local_kernel2D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
-    "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
-    "{\n"
-    " int i, j;\n"
-    // Zero the local storage first
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
-    "(%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_2D2D( (__local %s*)localBuffer, "
-    "(__global const "
-    "%s*)(src+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
-    "srcStride)), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, "
-    "srcStride, dstStride, 0 );\n"
-    // Wait for the copy to complete, then verify by manually copying to the
-    // dest
-    "     wait_group_events( 1, &event );\n"
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     dst[ (get_global_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
-    "localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ];\n"
-    "}\n";
-
-static const char *async_local_to_global_kernel2D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
-    "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
-    "{\n"
-    " int i, j;\n"
-    // Zero the local storage first
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = "
-    "(%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = src[ "
-    "(get_global_id( 0 )*lineCopiesPerWorkItem+i)*(numElementsPerLine + "
-    "srcStride)+j ];\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_2D2D((__global "
-    "%s*)(dst+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
-    "dstStride)), (__local const %s*)localBuffer, (size_t)numElementsPerLine, "
-    "(size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 );\n"
-    "    wait_group_events( 1, &event );\n"
-    "}\n";
-
-int test_copy2D(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcStride, int dstStride,
-                bool localIsDst)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[2];
-    size_t threads[1], localThreads[1];
-    void *inBuffer, *outBuffer, *outBufferCopy;
-    MTdata d;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
-
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcStride = %d, dstStride = %d\n", vecNameString,
-             srcStride, dstStride);
-
-    cl_long max_local_mem_size;
-    error =
-        clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
-
-    cl_long max_global_mem_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
-                            sizeof(max_global_mem_size), &max_global_mem_size,
-                            NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed.");
-
-    cl_long max_alloc_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
-                            sizeof(max_alloc_size), &max_alloc_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
-
-    if (max_alloc_size > max_global_mem_size / 2)
-        max_alloc_size = max_global_mem_size / 2;
-
-    unsigned int num_of_compute_devices;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
-                            sizeof(num_of_compute_devices),
-                            &num_of_compute_devices, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
-
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
-
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString);
-    // log_info("program: %s\n", programSource);
-    programPtr = programSource;
-
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
-    test_error(error, "Unable to create testing kernel");
-
-    size_t max_workgroup_size;
-    error = clGetKernelWorkGroupInfo(
-        kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size),
-        &max_workgroup_size, NULL);
-    test_error(
-        error,
-        "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
-
-    size_t max_local_workgroup_size[3];
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            sizeof(max_local_workgroup_size),
-                            max_local_workgroup_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-
-    // Pick the minimum of the device and the kernel
-    if (max_workgroup_size > max_local_workgroup_size[0])
-        max_workgroup_size = max_local_workgroup_size[0];
-
-    size_t numElementsPerLine = 10;
-    size_t lineCopiesPerWorkItem = 13;
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize
-        * (numElementsPerLine + (localIsDst ? dstStride : srcStride));
-    size_t maxLocalWorkgroupSize =
-        (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
-
-    // Calculation can return 0 on embedded devices due to 1KB local mem limit
-    if (maxLocalWorkgroupSize == 0)
-    {
-        maxLocalWorkgroupSize = 1;
-    }
-
-    size_t localWorkgroupSize = maxLocalWorkgroupSize;
-    if (maxLocalWorkgroupSize > max_workgroup_size)
-        localWorkgroupSize = max_workgroup_size;
-
-    size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcStride)
-        / (numElementsPerLine + srcStride);
-    size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride)
-        / (numElementsPerLine + dstStride);
-    size_t maxTotalLines = (std::min)(maxTotalLinesIn, maxTotalLinesOut);
-    size_t maxLocalWorkgroups =
-        maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem);
-
-    size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
-        - (localIsDst ? dstStride : srcStride);
-    size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
-    size_t totalLines =
-        numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem;
-    size_t inBufferSize = elementSize
-        * (totalLines * numElementsPerLine + (totalLines - 1) * srcStride);
-    size_t outBufferSize = elementSize
-        * (totalLines * numElementsPerLine + (totalLines - 1) * dstStride);
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
-
-    inBuffer = (void *)malloc(inBufferSize);
-    outBuffer = (void *)malloc(outBufferSize);
-    outBufferCopy = (void *)malloc(outBufferSize);
-
-    cl_int lineCopiesPerWorkItemInt, numElementsPerLineInt,
-        lineCopiesPerWorkgroup;
-    lineCopiesPerWorkItemInt = (int)lineCopiesPerWorkItem;
-    numElementsPerLineInt = (int)numElementsPerLine;
-    lineCopiesPerWorkgroup = (int)(lineCopiesPerWorkItem * localWorkgroupSize);
-
-    log_info(
-        "Global: %d, local %d, local buffer %db, global in buffer %db, "
-        "global out buffer %db, each work group will copy %d lines and each "
-        "work item item will copy %d lines.\n",
-        (int)globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize,
-        (int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup,
-        lineCopiesPerWorkItemInt);
-
-    threads[0] = globalWorkgroupSize;
-    localThreads[0] = localWorkgroupSize;
-
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
-    generate_random_data(
-        vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
-    free_mtdata(d);
-    d = NULL;
-    memcpy(outBufferCopy, outBuffer, outBufferSize);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
-                                inBuffer, &error);
-    test_error(error, "Unable to create input buffer");
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, outBufferSize,
-                                outBuffer, &error);
-    test_error(error, "Unable to create output buffer");
-
-    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 2, localBufferSize, NULL);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 3, sizeof(numElementsPerLineInt),
-                           &numElementsPerLineInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 4, sizeof(lineCopiesPerWorkgroup),
-                           &lineCopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 5, sizeof(lineCopiesPerWorkItemInt),
-                           &lineCopiesPerWorkItemInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 6, sizeof(srcStride), &srcStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 7, sizeof(dstStride), &dstStride);
-    test_error(error, "Unable to set kernel argument");
-
-    // Enqueue
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
-                                   localThreads, 0, NULL, NULL);
-    test_error(error, "Unable to queue kernel");
-
-    // Read
-    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
-                                outBuffer, 0, NULL, NULL);
-    test_error(error, "Unable to read results");
-
-    // Verify
-    int failuresPrinted = 0;
-    // Verify
-    size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-    for (int i = 0;
-         i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize;
-         i += elementSize)
-    {
-        for (int j = 0; j < (int)numElementsPerLine * elementSize;
-             j += elementSize)
-        {
-            int inIdx = i * (numElementsPerLine + srcStride) + j;
-            int outIdx = i * (numElementsPerLine + dstStride) + j;
-            if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
-                       typeSize)
-                != 0)
-            {
-                unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
-                unsigned char *outchar = (unsigned char *)outBuffer + outIdx;
-                char values[4096];
-                values[0] = 0;
-
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of copy did not validate!\n");
-                }
-                sprintf(values + strlen(values), "%d -> [", inIdx);
-                for (int k = 0; k < (int)elementSize; k++)
-                    sprintf(values + strlen(values), "%2x ", inchar[k]);
-                sprintf(values + strlen(values), "] != [");
-                for (int k = 0; k < (int)elementSize; k++)
-                    sprintf(values + strlen(values), "%2x ", outchar[k]);
-                sprintf(values + strlen(values), "]");
-                log_error("%s\n", values);
-                failuresPrinted++;
-            }
-
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                return -1;
-            }
-        }
-        if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1)
-                * elementSize)
-        {
-            int outIdx = i * (numElementsPerLine + dstStride)
-                + numElementsPerLine * elementSize;
-            if (memcmp(((char *)outBuffer) + outIdx,
-                       ((char *)outBufferCopy) + outIdx,
-                       dstStride * elementSize)
-                != 0)
-            {
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of copy did not validate!\n");
-                }
-                log_error(
-                    "2D copy corrupted data in output buffer in the stride "
-                    "offset of line %d\n",
-                    i);
-                failuresPrinted++;
-            }
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                return -1;
-            }
-        }
-    }
-
-    free(inBuffer);
-    free(outBuffer);
-    free(outBufferCopy);
-
-    return failuresPrinted ? -1 : 0;
-}
-
-int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, const char *kernelCode,
-                          bool localIsDst)
-{
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcStride, dstStride;
-
-    int errors = 0;
-
-    if (!is_extension_available(deviceID, "cl_khr_extended_async_copies"))
-    {
-        log_info(
-            "Device does not support extended async copies. Skipping test.\n");
-        return 0;
-    }
-
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
-    {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-
-        for (size = 0; vecSizes[size] != 0; size++)
-        {
-            if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
-                <= 2) // small type
-            {
-                for (srcStride = 0; srcStride < sizeof(smallTypesStrideSizes)
-                         / sizeof(smallTypesStrideSizes[0]);
-                     srcStride++)
-                {
-                    for (dstStride = 0;
-                         dstStride < sizeof(smallTypesStrideSizes)
-                             / sizeof(smallTypesStrideSizes[0]);
-                         dstStride++)
-                    {
-                        if (test_copy2D(deviceID, context, queue, kernelCode,
-                                        vecType[typeIndex], vecSizes[size],
-                                        smallTypesStrideSizes[srcStride],
-                                        smallTypesStrideSizes[dstStride],
-                                        localIsDst))
-                        {
-                            errors++;
-                        }
-                    }
-                }
-            }
-            // not a small type, check only zero stride
-            else if (test_copy2D(deviceID, context, queue, kernelCode,
-                                 vecType[typeIndex], vecSizes[size], 0, 0,
-                                 localIsDst))
-            {
-                errors++;
-            }
-        }
-    }
-    if (errors) return -1;
-    return 0;
-}
-
-int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-{
-    return test_copy2D_all_types(deviceID, context, queue,
-                                 async_global_to_local_kernel2D, true);
-}
-
-int test_async_copy_local_to_global2D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-{
-    return test_copy2D_all_types(deviceID, context, queue,
-                                 async_local_to_global_kernel2D, false);
-}

diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp
deleted file mode 100644
index 252159b..0000000
--- a/test_conformance/basic/test_async_copy3D.cpp
+++ /dev/null

@@ -1,546 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "../../test_common/harness/compat.h"
-
-#include <algorithm>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "../../test_common/harness/conversions.h"
-#include "procs.h"
-
-static const char *async_global_to_local_kernel3D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int numLines, int "
-    "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
-    "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
-    "{\n"
-    " int i, j, k;\n"
-    // Zero the local storage first
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ] = (%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_3D3D( (__local %s*)localBuffer, "
-    "(__global const "
-    "%s*)(src+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
-    "numElementsPerLine + numLines*srcLineStride + srcPlaneStride)), "
-    "(size_t)numElementsPerLine, (size_t)numLines, srcLineStride, "
-    "dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, dstPlaneStride, "
-    "0 );\n"
-    // Wait for the copy to complete, then verify by manually copying to the
-    // dest
-    " wait_group_events( 1, &event );\n"
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       dst[ (get_global_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ] = localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ];\n"
-    "}\n";
-
-static const char *async_local_to_global_kernel3D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int numLines, int "
-    "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
-    "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
-    "{\n"
-    " int i, j, k;\n"
-    // Zero the local storage first
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ] = (%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ] = src[ (get_global_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ];\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_3D3D((__global "
-    "%s*)(dst+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
-    "numElementsPerLine + numLines*dstLineStride + dstPlaneStride)), (__local "
-    "const %s*)localBuffer, (size_t)numElementsPerLine, (size_t)numLines, "
-    "srcLineStride, dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, "
-    "dstPlaneStride, 0 );\n"
-    "    wait_group_events( 1, &event );\n"
-    "}\n";
-
-int test_copy3D(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcLineStride,
-                int dstLineStride, int srcPlaneStride, int dstPlaneStride,
-                bool localIsDst)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[2];
-    size_t threads[1], localThreads[1];
-    void *inBuffer, *outBuffer, *outBufferCopy;
-    MTdata d;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
-
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcLineStride = %d, dstLineStride = %d, "
-             "srcPlaneStride = %d, dstPlaneStride = %d\n",
-             vecNameString, srcLineStride, dstLineStride, srcPlaneStride,
-             dstPlaneStride);
-
-    cl_long max_local_mem_size;
-    error =
-        clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
-
-    cl_long max_global_mem_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
-                            sizeof(max_global_mem_size), &max_global_mem_size,
-                            NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed.");
-
-    cl_long max_alloc_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
-                            sizeof(max_alloc_size), &max_alloc_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
-
-    if (max_alloc_size > max_global_mem_size / 2)
-        max_alloc_size = max_global_mem_size / 2;
-
-    unsigned int num_of_compute_devices;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
-                            sizeof(num_of_compute_devices),
-                            &num_of_compute_devices, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
-
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
-
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString);
-    // log_info("program: %s\n", programSource);
-    programPtr = programSource;
-
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
-    test_error(error, "Unable to create testing kernel");
-
-    size_t max_workgroup_size;
-    error = clGetKernelWorkGroupInfo(
-        kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size),
-        &max_workgroup_size, NULL);
-    test_error(
-        error,
-        "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
-
-    size_t max_local_workgroup_size[3];
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            sizeof(max_local_workgroup_size),
-                            max_local_workgroup_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-
-    // Pick the minimum of the device and the kernel
-    if (max_workgroup_size > max_local_workgroup_size[0])
-        max_workgroup_size = max_local_workgroup_size[0];
-
-    size_t numElementsPerLine = 10;
-    size_t numLines = 13;
-    size_t planesCopiesPerWorkItem = 2;
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem = elementSize
-        * (planesCopiesPerWorkItem
-           * (numLines * numElementsPerLine
-              + numLines * (localIsDst ? dstLineStride : srcLineStride)
-              + (localIsDst ? dstPlaneStride : srcPlaneStride)));
-    size_t maxLocalWorkgroupSize =
-        (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
-
-    // Calculation can return 0 on embedded devices due to 1KB local mem limit
-    if (maxLocalWorkgroupSize == 0)
-    {
-        maxLocalWorkgroupSize = 1;
-    }
-
-    size_t localWorkgroupSize = maxLocalWorkgroupSize;
-    if (maxLocalWorkgroupSize > max_workgroup_size)
-        localWorkgroupSize = max_workgroup_size;
-
-    size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride)
-        / ((numLines * numElementsPerLine + numLines * srcLineStride)
-           + srcPlaneStride);
-    size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
-        / ((numLines * numElementsPerLine + numLines * dstLineStride)
-           + dstPlaneStride);
-    size_t maxTotalPlanes = (std::min)(maxTotalPlanesIn, maxTotalPlanesOut);
-    size_t maxLocalWorkgroups =
-        maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
-
-    size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
-        - (localIsDst ? dstPlaneStride : srcPlaneStride);
-    size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
-    size_t totalPlanes =
-        numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
-    size_t inBufferSize = elementSize
-        * (totalPlanes
-               * (numLines * numElementsPerLine + numLines * srcLineStride)
-           + (totalPlanes - 1) * srcPlaneStride);
-    size_t outBufferSize = elementSize
-        * (totalPlanes
-               * (numLines * numElementsPerLine + numLines * dstLineStride)
-           + (totalPlanes - 1) * dstPlaneStride);
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
-
-    inBuffer = (void *)malloc(inBufferSize);
-    outBuffer = (void *)malloc(outBufferSize);
-    outBufferCopy = (void *)malloc(outBufferSize);
-
-    cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt,
-        planesCopiesPerWorkgroup;
-    planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem;
-    numElementsPerLineInt = (int)numElementsPerLine;
-    numLinesInt = (int)numLines;
-    planesCopiesPerWorkgroup =
-        (int)(planesCopiesPerWorkItem * localWorkgroupSize);
-
-    log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
-             "global out buffer %db, each work group will copy %d planes and "
-             "each work item item will copy %d planes.\n",
-             (int)globalWorkgroupSize, (int)localWorkgroupSize,
-             (int)localBufferSize, (int)inBufferSize, (int)outBufferSize,
-             planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt);
-
-    threads[0] = globalWorkgroupSize;
-    localThreads[0] = localWorkgroupSize;
-
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
-    generate_random_data(
-        vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
-    free_mtdata(d);
-    d = NULL;
-    memcpy(outBufferCopy, outBuffer, outBufferSize);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
-                                inBuffer, &error);
-    test_error(error, "Unable to create input buffer");
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, outBufferSize,
-                                outBuffer, &error);
-    test_error(error, "Unable to create output buffer");
-
-    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 2, localBufferSize, NULL);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 3, sizeof(numElementsPerLineInt),
-                           &numElementsPerLineInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 4, sizeof(numLinesInt), &numLinesInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 5, sizeof(planesCopiesPerWorkgroup),
-                           &planesCopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 6, sizeof(planesCopiesPerWorkItemInt),
-                           &planesCopiesPerWorkItemInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 7, sizeof(srcLineStride), &srcLineStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 8, sizeof(dstLineStride), &dstLineStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 9, sizeof(srcPlaneStride), &srcPlaneStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 10, sizeof(dstPlaneStride), &dstPlaneStride);
-    test_error(error, "Unable to set kernel argument");
-
-    // Enqueue
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
-                                   localThreads, 0, NULL, NULL);
-    test_error(error, "Unable to queue kernel");
-
-    // Read
-    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
-                                outBuffer, 0, NULL, NULL);
-    test_error(error, "Unable to read results");
-
-    // Verify
-    int failuresPrinted = 0;
-    // Verify
-    size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-    for (int i = 0;
-         i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize;
-         i += elementSize)
-    {
-        for (int j = 0; j < (int)numLines * elementSize; j += elementSize)
-        {
-            for (int k = 0; k < (int)numElementsPerLine * elementSize;
-                 k += elementSize)
-            {
-                int inIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * srcLineStride + srcPlaneStride)
-                    + j * (numElementsPerLine + srcLineStride) + k;
-                int outIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * dstLineStride + dstPlaneStride)
-                    + j * (numElementsPerLine + dstLineStride) + k;
-                if (memcmp(((char *)inBuffer) + inIdx,
-                           ((char *)outBuffer) + outIdx, typeSize)
-                    != 0)
-                {
-                    unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
-                    unsigned char *outchar =
-                        (unsigned char *)outBuffer + outIdx;
-                    char values[4096];
-                    values[0] = 0;
-
-                    if (failuresPrinted == 0)
-                    {
-                        // Print first failure message
-                        log_error("ERROR: Results of copy did not validate!");
-                    }
-                    sprintf(values + strlen(values), "%d -> [", inIdx);
-                    for (int l = 0; l < (int)elementSize; l++)
-                        sprintf(values + strlen(values), "%2x ", inchar[l]);
-                    sprintf(values + strlen(values), "] != [");
-                    for (int l = 0; l < (int)elementSize; l++)
-                        sprintf(values + strlen(values), "%2x ", outchar[l]);
-                    sprintf(values + strlen(values), "]");
-                    log_error("%s\n", values);
-                    failuresPrinted++;
-                }
-
-                if (failuresPrinted > 5)
-                {
-                    log_error("Not printing further failures...\n");
-                    return -1;
-                }
-            }
-            if (j < (int)numLines * elementSize)
-            {
-                int outIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * dstLineStride + dstPlaneStride)
-                    + j * (numElementsPerLine + dstLineStride)
-                    + numElementsPerLine * elementSize;
-                if (memcmp(((char *)outBuffer) + outIdx,
-                           ((char *)outBufferCopy) + outIdx,
-                           dstLineStride * elementSize)
-                    != 0)
-                {
-                    if (failuresPrinted == 0)
-                    {
-                        // Print first failure message
-                        log_error("ERROR: Results of copy did not validate!\n");
-                    }
-                    log_error(
-                        "3D copy corrupted data in output buffer in the line "
-                        "stride offset of plane %d line %d\n",
-                        i, j);
-                    failuresPrinted++;
-                }
-                if (failuresPrinted > 5)
-                {
-                    log_error("Not printing further failures...\n");
-                    return -1;
-                }
-            }
-        }
-        if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
-                * elementSize)
-        {
-            int outIdx = i
-                    * (numLines * numElementsPerLine + numLines * dstLineStride
-                       + dstPlaneStride)
-                + (numLines * elementSize) * (numElementsPerLine)
-                + (numLines * elementSize) * (dstLineStride);
-            if (memcmp(((char *)outBuffer) + outIdx,
-                       ((char *)outBufferCopy) + outIdx,
-                       dstPlaneStride * elementSize)
-                != 0)
-            {
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of copy did not validate!\n");
-                }
-                log_error("3D copy corrupted data in output buffer in the "
-                          "plane stride "
-                          "offset of plane %d\n",
-                          i);
-                failuresPrinted++;
-            }
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                return -1;
-            }
-        }
-    }
-
-    free(inBuffer);
-    free(outBuffer);
-    free(outBufferCopy);
-
-    return failuresPrinted ? -1 : 0;
-}
-
-int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, const char *kernelCode,
-                          bool localIsDst)
-{
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride,
-        dstPlaneStride;
-
-    int errors = 0;
-
-    if (!is_extension_available(deviceID, "cl_khr_extended_async_copies"))
-    {
-        log_info(
-            "Device does not support extended async copies. Skipping test.\n");
-        return 0;
-    }
-
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
-    {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-
-        for (size = 0; vecSizes[size] != 0; size++)
-        {
-            if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
-                <= 2) // small type
-            {
-                for (srcLineStride = 0;
-                     srcLineStride < sizeof(smallTypesStrideSizes)
-                         / sizeof(smallTypesStrideSizes[0]);
-                     srcLineStride++)
-                {
-                    for (dstLineStride = 0;
-                         dstLineStride < sizeof(smallTypesStrideSizes)
-                             / sizeof(smallTypesStrideSizes[0]);
-                         dstLineStride++)
-                    {
-                        for (srcPlaneStride = 0;
-                             srcPlaneStride < sizeof(smallTypesStrideSizes)
-                                 / sizeof(smallTypesStrideSizes[0]);
-                             srcPlaneStride++)
-                        {
-                            for (dstPlaneStride = 0;
-                                 dstPlaneStride < sizeof(smallTypesStrideSizes)
-                                     / sizeof(smallTypesStrideSizes[0]);
-                                 dstPlaneStride++)
-                            {
-                                if (test_copy3D(
-                                        deviceID, context, queue, kernelCode,
-                                        vecType[typeIndex], vecSizes[size],
-                                        smallTypesStrideSizes[srcLineStride],
-                                        smallTypesStrideSizes[dstLineStride],
-                                        smallTypesStrideSizes[srcPlaneStride],
-                                        smallTypesStrideSizes[dstPlaneStride],
-                                        localIsDst))
-                                {
-                                    errors++;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            // not a small type, check only zero stride
-            else if (test_copy3D(deviceID, context, queue, kernelCode,
-                                 vecType[typeIndex], vecSizes[size], 0, 0, 0, 0,
-                                 localIsDst))
-            {
-                errors++;
-            }
-        }
-    }
-    if (errors) return -1;
-    return 0;
-}
-
-int test_async_copy_global_to_local3D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-{
-    return test_copy3D_all_types(deviceID, context, queue,
-                                 async_global_to_local_kernel3D, true);
-}
-
-int test_async_copy_local_to_global3D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-{
-    return test_copy3D_all_types(deviceID, context, queue,
-                                 async_local_to_global_kernel3D, false);
-}

diff --git a/test_conformance/basic/test_async_copy_fence.cpp b/test_conformance/basic/test_async_copy_fence.cpp
deleted file mode 100644
index 43245da..0000000
--- a/test_conformance/basic/test_async_copy_fence.cpp
+++ /dev/null

@@ -1,812 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "../../test_common/harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "../../test_common/harness/conversions.h"
-#include "procs.h"
-
-static const char *import_after_export_aliased_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *exportSrc, __global %s "
-    "*exportDst,\n"
-    "                       const __global %s *importSrc, __global %s "
-    "*importDst,\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since export src and import dst are aliased*/\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    "    int localImportOffset = exportSrcLocalSize - importSrcLocalSize;\n"
-    // Zero the local storage first
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "export src and import dst are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDst+exportSrcLocalSize*get_group_id(0)), (__local const "
-    "%s*)localBuffer, (size_t)exportSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_LOCAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy( (__local "
-    "%s*)(localBuffer+localImportOffset), (__global const "
-    "%s*)(importSrc+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, events );\n"
-    // Wait for the export and import to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(localBuffer+localImportOffset)[ get_local_id( 0 "
-    ")*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-
-static const char *import_after_export_aliased_global_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *exportSrc, __global %s "
-    "*exportDstImportSrc,\n"
-    "                       __global %s *importDst, /* there isn't a dedicated "
-    "__global %s buffer for import src since export dst and import src are "
-    "aliased*/\n"
-    "                       __local %s *exportLocalBuffer, __local %s "
-    "*importLocalBuffer,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0)), (__local "
-    "const %s*)exportLocalBuffer, (size_t)exportSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy( (__local %s*)importLocalBuffer, "
-    "(__global const "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0) + "
-    "(exportSrcLocalSize - importSrcLocalSize)), (size_t)importSrcLocalSize, "
-    "events );\n"
-    // Wait for the export and import to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-
-static const char *import_after_export_aliased_global_and_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *exportSrc, __global %s "
-    "*exportDstImportSrc,\n"
-    "                       __global %s *importDst, /* there isn't a dedicated "
-    "__global %s buffer for import src since export dst and import src are "
-    "aliased*/\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since export src and import dst are aliased*/\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    "    int localImportOffset = exportSrcLocalSize - importSrcLocalSize;\n"
-    // Zero the local storage first
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "export src and import dst are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0)), (__local "
-    "const %s*)localBuffer, (size_t)exportSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE | "
-    "CLK_LOCAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy( (__local "
-    "%s*)(localBuffer+localImportOffset), (__global const "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0) + "
-    "(exportSrcLocalSize - importSrcLocalSize)), (size_t)importSrcLocalSize, "
-    "events );\n"
-    // Wait for the export and import to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(localBuffer+localImportOffset)[ get_local_id( 0 "
-    ")*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-
-static const char *export_after_import_aliased_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *importSrc, __global %s "
-    "*importDst,\n"
-    "                       const __global %s *exportDst, /* there isn't a "
-    "dedicated __global %s buffer for export src since the local memory is "
-    "aliased, so the export src is taken from it */\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since import dst and export src are aliased*/\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "import dst and export src are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy( (__local %s*)localBuffer, (__global "
-    "const %s*)(importSrc+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, events );\n"
-    "    async_work_group_copy_fence( CLK_LOCAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDst+exportSrcLocalSize*get_group_id(0)), (__local const "
-    "%s*)(localBuffer + (importSrcLocalSize - exportSrcLocalSize)), "
-    "(size_t)exportSrcLocalSize, 0 );\n"
-    // Wait for the import and export to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-
-static const char *export_after_import_aliased_global_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *importSrcExportDst, __global %s "
-    "*importDst,\n"
-    "                       const __global %s *exportSrc,\n"
-    "                       /* there isn't a dedicated __global %s buffer for "
-    "export dst since import src and export dst are aliased */\n"
-    "                       __local %s *importLocalBuffer, __local %s "
-    "*exportLocalBuffer,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy( (__local %s*)importLocalBuffer, "
-    "(__global const "
-    "%s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0) + "
-    "(importSrcLocalSize - exportSrcLocalSize)), (__local const "
-    "%s*)exportLocalBuffer, (size_t)exportSrcLocalSize, events );\n"
-    // Wait for the import and export to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-
-static const char *export_after_import_aliased_global_and_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *importSrcExportDst, __global %s "
-    "*importDst,\n"
-    "                       /* there isn't a dedicated __global %s buffer for "
-    "export src since the local memory is aliased, so the export src is taken "
-    "from it */\n"
-    "                       /* there isn't a dedicated __global %s buffer for "
-    "export dst since import src and export dst are aliased */\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since import dst and export src are aliased*/\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "import dst and export src are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy( (__local %s*)localBuffer, (__global "
-    "const %s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE | "
-    "CLK_LOCAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0) + "
-    "(importSrcLocalSize - exportSrcLocalSize)), (__local const "
-    "%s*)(localBuffer + (importSrcLocalSize - exportSrcLocalSize)), "
-    "(size_t)exportSrcLocalSize, events );\n"
-    // Wait for the import and export to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-
-int test_copy_fence(cl_device_id deviceID, cl_context context,
-                    cl_command_queue queue, const char *kernelCode,
-                    ExplicitType vecType, int vecSize, bool export_after_import,
-                    bool aliased_local_mem, bool aliased_global_mem)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[4];
-    size_t threads[1], localThreads[1];
-    void *transaction1InBuffer, *transaction1OutBuffer, *transaction2InBuffer,
-        *transaction2OutBuffer;
-    MTdata d;
-    bool transaction1DstIsTransaction2Src =
-        (aliased_global_mem && !export_after_import)
-        || (aliased_local_mem && export_after_import);
-    bool transaction1SrcIsTransaction2Dst =
-        aliased_global_mem && export_after_import;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
-
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s\n", vecNameString);
-
-    cl_long max_local_mem_size;
-    error =
-        clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
-
-    unsigned int num_of_compute_devices;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
-                            sizeof(num_of_compute_devices),
-                            &num_of_compute_devices, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
-
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
-
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString,
-            vecNameString, vecNameString);
-    // log_info("program: %s\n", programSource);
-    programPtr = programSource;
-
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
-    test_error(error, "Unable to create testing kernel");
-
-    size_t max_workgroup_size;
-    error = clGetKernelWorkGroupInfo(
-        kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size),
-        &max_workgroup_size, NULL);
-    test_error(
-        error,
-        "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
-
-    size_t max_local_workgroup_size[3];
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            sizeof(max_local_workgroup_size),
-                            max_local_workgroup_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-
-    // Pick the minimum of the device and the kernel
-    if (max_workgroup_size > max_local_workgroup_size[0])
-        max_workgroup_size = max_local_workgroup_size[0];
-
-    size_t transaction1NumberOfCopiesPerWorkitem = 13;
-    size_t transaction2NumberOfCopiesPerWorkitem = 2;
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem =
-        transaction1NumberOfCopiesPerWorkitem * elementSize
-        + (aliased_local_mem
-               ? 0
-               : transaction2NumberOfCopiesPerWorkitem * elementSize);
-    size_t maxLocalWorkgroupSize =
-        (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
-
-    // Calculation can return 0 on embedded devices due to 1KB local mem limit
-    if (maxLocalWorkgroupSize == 0)
-    {
-        maxLocalWorkgroupSize = 1;
-    }
-
-    size_t localWorkgroupSize = maxLocalWorkgroupSize;
-    if (maxLocalWorkgroupSize > max_workgroup_size)
-        localWorkgroupSize = max_workgroup_size;
-
-    size_t transaction1LocalBufferSize = localWorkgroupSize * elementSize
-        * transaction1NumberOfCopiesPerWorkitem;
-    size_t transaction2LocalBufferSize = localWorkgroupSize * elementSize
-        * transaction2NumberOfCopiesPerWorkitem; // irrelevant if
-                                                 // aliased_local_mem
-    size_t numberOfLocalWorkgroups = 1111;
-    size_t transaction1GlobalBufferSize =
-        numberOfLocalWorkgroups * transaction1LocalBufferSize;
-    size_t transaction2GlobalBufferSize =
-        numberOfLocalWorkgroups * transaction2LocalBufferSize;
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
-
-    transaction1InBuffer = (void *)malloc(transaction1GlobalBufferSize);
-    transaction1OutBuffer = (void *)malloc(transaction1GlobalBufferSize);
-    transaction2InBuffer = (void *)malloc(transaction2GlobalBufferSize);
-    transaction2OutBuffer = (void *)malloc(transaction2GlobalBufferSize);
-    memset(transaction1OutBuffer, 0, transaction1GlobalBufferSize);
-    memset(transaction2OutBuffer, 0, transaction2GlobalBufferSize);
-
-    cl_int transaction1CopiesPerWorkitemInt, transaction1CopiesPerWorkgroup,
-        transaction2CopiesPerWorkitemInt, transaction2CopiesPerWorkgroup;
-    transaction1CopiesPerWorkitemInt =
-        (int)transaction1NumberOfCopiesPerWorkitem;
-    transaction1CopiesPerWorkgroup =
-        (int)(transaction1NumberOfCopiesPerWorkitem * localWorkgroupSize);
-    transaction2CopiesPerWorkitemInt =
-        (int)transaction2NumberOfCopiesPerWorkitem;
-    transaction2CopiesPerWorkgroup =
-        (int)(transaction2NumberOfCopiesPerWorkitem * localWorkgroupSize);
-
-    log_info(
-        "Global: %d, local %d. 1st Transaction: local buffer %db, global "
-        "buffer %db, each work group will copy %d elements and each work "
-        "item item will copy %d elements. 2nd Transaction: local buffer "
-        "%db, global buffer %db, each work group will copy %d elements and "
-        "each work item will copy %d elements\n",
-        (int)globalWorkgroupSize, (int)localWorkgroupSize,
-        (int)transaction1LocalBufferSize, (int)transaction1GlobalBufferSize,
-        transaction1CopiesPerWorkgroup, transaction1CopiesPerWorkitemInt,
-        (int)transaction2LocalBufferSize, (int)transaction2GlobalBufferSize,
-        transaction2CopiesPerWorkgroup, transaction2CopiesPerWorkitemInt);
-
-    threads[0] = globalWorkgroupSize;
-    localThreads[0] = localWorkgroupSize;
-
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, transaction1GlobalBufferSize / get_explicit_type_size(vecType),
-        d, transaction1InBuffer);
-    if (!transaction1DstIsTransaction2Src)
-    {
-        generate_random_data(vecType,
-                             transaction2GlobalBufferSize
-                                 / get_explicit_type_size(vecType),
-                             d, transaction2InBuffer);
-    }
-    free_mtdata(d);
-    d = NULL;
-
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                transaction1GlobalBufferSize,
-                                transaction1InBuffer, &error);
-    test_error(error, "Unable to create input buffer");
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                transaction1GlobalBufferSize,
-                                transaction1OutBuffer, &error);
-    test_error(error, "Unable to create output buffer");
-    if (!transaction1DstIsTransaction2Src)
-    {
-        streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    transaction2GlobalBufferSize,
-                                    transaction2InBuffer, &error);
-        test_error(error, "Unable to create input buffer");
-    }
-    if (!transaction1SrcIsTransaction2Dst)
-    {
-        streams[3] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    transaction2GlobalBufferSize,
-                                    transaction2OutBuffer, &error);
-        test_error(error, "Unable to create output buffer");
-    }
-
-    cl_uint argIndex = 0;
-    error = clSetKernelArg(kernel, argIndex, sizeof(streams[0]), &streams[0]);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error = clSetKernelArg(kernel, argIndex, sizeof(streams[1]), &streams[1]);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    if (!transaction1DstIsTransaction2Src)
-    {
-        error =
-            clSetKernelArg(kernel, argIndex, sizeof(streams[2]), &streams[2]);
-        test_error(error, "Unable to set kernel argument");
-        ++argIndex;
-    }
-    if (!transaction1SrcIsTransaction2Dst)
-    {
-        error =
-            clSetKernelArg(kernel, argIndex, sizeof(streams[3]), &streams[3]);
-        test_error(error, "Unable to set kernel argument");
-        ++argIndex;
-    }
-    error = clSetKernelArg(kernel, argIndex, transaction1LocalBufferSize, NULL);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    if (!aliased_local_mem)
-    {
-        error =
-            clSetKernelArg(kernel, argIndex, transaction2LocalBufferSize, NULL);
-        test_error(error, "Unable to set kernel argument");
-        ++argIndex;
-    }
-    error =
-        clSetKernelArg(kernel, argIndex, sizeof(transaction1CopiesPerWorkgroup),
-                       &transaction1CopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error = clSetKernelArg(kernel, argIndex,
-                           sizeof(transaction1CopiesPerWorkitemInt),
-                           &transaction1CopiesPerWorkitemInt);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error =
-        clSetKernelArg(kernel, argIndex, sizeof(transaction2CopiesPerWorkgroup),
-                       &transaction2CopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error = clSetKernelArg(kernel, argIndex,
-                           sizeof(transaction2CopiesPerWorkitemInt),
-                           &transaction2CopiesPerWorkitemInt);
-    test_error(error, "Unable to set kernel argument");
-
-    // Enqueue
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
-                                   localThreads, 0, NULL, NULL);
-    test_error(error, "Unable to queue kernel");
-
-    // Read
-    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
-                                transaction1GlobalBufferSize,
-                                transaction1OutBuffer, 0, NULL, NULL);
-    test_error(error, "Unable to read results");
-    if (transaction1DstIsTransaction2Src)
-    {
-        for (size_t idx = 0; idx < numberOfLocalWorkgroups; idx++)
-        {
-            memcpy(
-                (void *)((unsigned char *)transaction2InBuffer
-                         + idx * transaction2CopiesPerWorkgroup * elementSize),
-                (const void *)((unsigned char *)transaction1OutBuffer
-                               + (idx * transaction1CopiesPerWorkgroup
-                                  + (transaction1CopiesPerWorkgroup
-                                     - transaction2CopiesPerWorkgroup))
-                                   * elementSize),
-                (size_t)transaction2CopiesPerWorkgroup * elementSize);
-        }
-    }
-    if (transaction1SrcIsTransaction2Dst)
-    {
-        void *transaction1SrcBuffer =
-            (void *)malloc(transaction1GlobalBufferSize);
-        error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0,
-                                    transaction1GlobalBufferSize,
-                                    transaction1SrcBuffer, 0, NULL, NULL);
-        test_error(error, "Unable to read results");
-        for (size_t idx = 0; idx < numberOfLocalWorkgroups; idx++)
-        {
-            memcpy(
-                (void *)((unsigned char *)transaction2OutBuffer
-                         + idx * transaction2CopiesPerWorkgroup * elementSize),
-                (const void *)((unsigned char *)transaction1SrcBuffer
-                               + (idx * transaction1CopiesPerWorkgroup
-                                  + (transaction1CopiesPerWorkgroup
-                                     - transaction2CopiesPerWorkgroup))
-                                   * elementSize),
-                (size_t)transaction2CopiesPerWorkgroup * elementSize);
-        }
-        free(transaction1SrcBuffer);
-    }
-    else
-    {
-        error = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0,
-                                    transaction2GlobalBufferSize,
-                                    transaction2OutBuffer, 0, NULL, NULL);
-        test_error(error, "Unable to read results");
-    }
-
-    // Verify
-    int failuresPrinted = 0;
-    if (memcmp(transaction1InBuffer, transaction1OutBuffer,
-               transaction1GlobalBufferSize)
-        != 0)
-    {
-        size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-        unsigned char *inchar = (unsigned char *)transaction1InBuffer;
-        unsigned char *outchar = (unsigned char *)transaction1OutBuffer;
-        for (int i = 0; i < (int)transaction1GlobalBufferSize;
-             i += (int)elementSize)
-        {
-            if (memcmp(((char *)inchar) + i, ((char *)outchar) + i, typeSize)
-                != 0)
-            {
-                char values[4096];
-                values[0] = 0;
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of 1st transaction did not "
-                              "validate!\n");
-                }
-                sprintf(values + strlen(values), "%d -> [", i);
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", inchar[i + j]);
-                sprintf(values + strlen(values), "] != [");
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", outchar[i + j]);
-                sprintf(values + strlen(values), "]");
-                log_error("%s\n", values);
-                failuresPrinted++;
-            }
-
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                break;
-            }
-        }
-    }
-    if (memcmp(transaction2InBuffer, transaction2OutBuffer,
-               transaction2GlobalBufferSize)
-        != 0)
-    {
-        size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-        unsigned char *inchar = (unsigned char *)transaction2InBuffer;
-        unsigned char *outchar = (unsigned char *)transaction2OutBuffer;
-        for (int i = 0; i < (int)transaction2GlobalBufferSize;
-             i += (int)elementSize)
-        {
-            if (memcmp(((char *)inchar) + i, ((char *)outchar) + i, typeSize)
-                != 0)
-            {
-                char values[4096];
-                values[0] = 0;
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of 2nd transaction did not "
-                              "validate!\n");
-                }
-                sprintf(values + strlen(values), "%d -> [", i);
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", inchar[i + j]);
-                sprintf(values + strlen(values), "] != [");
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", outchar[i + j]);
-                sprintf(values + strlen(values), "]");
-                log_error("%s\n", values);
-                failuresPrinted++;
-            }
-
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                break;
-            }
-        }
-    }
-
-    free(transaction1InBuffer);
-    free(transaction1OutBuffer);
-    free(transaction2InBuffer);
-    free(transaction2OutBuffer);
-
-    return failuresPrinted ? -1 : 0;
-}
-
-int test_copy_fence_all_types(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, const char *kernelCode,
-                              bool export_after_import, bool aliased_local_mem,
-                              bool aliased_global_mem)
-{
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int size, typeIndex;
-
-    int errors = 0;
-
-    if (!is_extension_available(deviceID, "cl_khr_async_work_group_copy_fence"))
-    {
-        log_info(
-            "Device does not support extended async copies fence. Skipping "
-            "test.\n");
-        return 0;
-    }
-
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
-    {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-
-        for (size = 0; vecSizes[size] != 0; size++)
-        {
-            if (test_copy_fence(deviceID, context, queue, kernelCode,
-                                vecType[typeIndex], vecSizes[size],
-                                export_after_import, aliased_local_mem,
-                                aliased_global_mem))
-            {
-                errors++;
-            }
-        }
-    }
-    if (errors) return -1;
-    return 0;
-}
-
-int test_async_work_group_copy_fence_import_after_export_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     import_after_export_aliased_local_kernel,
-                                     false, true, false);
-}
-
-int test_async_work_group_copy_fence_import_after_export_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     import_after_export_aliased_global_kernel,
-                                     false, false, true);
-}
-
-int test_async_work_group_copy_fence_import_after_export_aliased_global_and_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return test_copy_fence_all_types(
-        deviceID, context, queue,
-        import_after_export_aliased_global_and_local_kernel, false, true, true);
-}
-
-int test_async_work_group_copy_fence_export_after_import_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     export_after_import_aliased_local_kernel,
-                                     true, true, false);
-}
-
-int test_async_work_group_copy_fence_export_after_import_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     export_after_import_aliased_global_kernel,
-                                     true, false, true);
-}
-
-int test_async_work_group_copy_fence_export_after_import_aliased_global_and_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return test_copy_fence_all_types(
-        deviceID, context, queue,
-        export_after_import_aliased_global_and_local_kernel, true, true, true);
-}

diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp
index c456f38..fe76c84 100644
--- a/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/basic/test_async_strided_copy.cpp

@@ -215,8 +215,6 @@
                 sprintf(values + strlen( values), "%2x ", outchar[j]);
             sprintf(values + strlen(values), "]");
             log_error("%s\n", values);
-            free(inBuffer);
-            free(outBuffer);
             return -1;
         }
     }

diff --git a/test_conformance/basic/test_barrier.cpp b/test_conformance/basic/test_barrier.cpp
index d20af14..822b8eb 100644
--- a/test_conformance/basic/test_barrier.cpp
+++ b/test_conformance/basic/test_barrier.cpp

@@ -108,15 +108,11 @@
     input_ptr = (int*)malloc(sizeof(int) * num_elements);
     output_ptr = (int*)malloc(sizeof(int));
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[2] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
 
     d = init_genrand( gRandomSeed );

diff --git a/test_conformance/basic/test_basic_parameter_types.cpp b/test_conformance/basic/test_basic_parameter_types.cpp
index 6e99d46..886da6a 100644
--- a/test_conformance/basic/test_basic_parameter_types.cpp
+++ b/test_conformance/basic/test_basic_parameter_types.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,381 +23,279 @@
 
 #include "procs.h"
 
-const char *kernel_code = R"(
-__kernel void test_kernel(
-char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,
-__global float%s *result)
+const char *kernel_code =
+"__kernel void test_kernel(\n"
+"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(c);\n"
+"  result[1] = %s(uc);\n"
+"  result[2] = %s(s);\n"
+"  result[3] = %s(us);\n"
+"  result[4] = %s(i);\n"
+"  result[5] = %s(ui);\n"
+"  result[6] = f;\n"
+"}\n";
+
+const char *kernel_code_long =
+"__kernel void test_kernel_long(\n"
+"long%s l, ulong%s ul,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(l);\n"
+"  result[1] = %s(ul);\n"
+"}\n";
+
+int test_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    result[0] = %s(c);
-    result[1] = %s(uc);
-    result[2] = %s(s);
-    result[3] = %s(us);
-    result[4] = %s(i);
-    result[5] = %s(ui);
-    result[6] = f;
-})";
+  clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[2*16];
+  int count, index;
+  const char* types[] = { "long", "ulong" };
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
 
-const char *kernel_code_long = R"(
-__kernel void test_kernel_long(
-long%s l, ulong%s ul,
-__global float%s *result)
-{
-    result[0] = %s(l);
-    result[1] = %s(ul);
-})";
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 
-int test_parameter_types_long(cl_device_id device, cl_context context,
-                              cl_command_queue queue, int num_elements)
-{
-    clMemWrapper results;
-    int error;
-    size_t global[3] = { 1, 1, 1 };
-    float results_back[2 * 16];
-    int count, index;
-    const char *types[] = { "long", "ulong" };
-    char kernel_string[8192];
-    int sizes[] = { 1, 2, 4, 8, 16 };
-    const char *size_strings[] = { "", "2", "4", "8", "16" };
-    float expected;
-    int total_errors = 0;
-    int size_to_test;
-    char *ptr;
-    char convert_string[1024];
-    size_t max_parameter_size;
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
 
-    // We don't really care about the contents since we're just testing that the
-    // types work.
-    cl_long l[16] = { -21, -1, 2,  -3,  4,  -5,  6,  -7,
-                      8,   -9, 10, -11, 12, -13, 14, -15 };
-    cl_ulong ul[16] = { 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
 
-    // Calculate how large our paramter size is to the kernel
-    size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
 
-    // Init our strings.
-    kernel_string[0] = '\0';
-    convert_string[0] = '\0';
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
 
-    // Get the maximum parameter size allowed
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE,
-                        sizeof(max_parameter_size), &max_parameter_size, NULL);
-    test_error(error, "Unable to get max parameter size from device");
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
 
-    // Create the results buffer
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             sizeof(cl_float) * 2 * 16, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-
-    // Go over all the vector sizes
-    for (size_to_test = 0; size_to_test < 5; size_to_test++)
-    {
-        clProgramWrapper program;
-        clKernelWrapper kernel;
-
-        size_t total_parameter_size =
-            parameter_size * sizes[size_to_test] + sizeof(cl_mem);
-        if (total_parameter_size > max_parameter_size)
-        {
-            log_info(
-                "Can not test with vector size %d because it would exceed the "
-                "maximum allowed parameter size to the kernel. (%d > %d)\n",
-                (int)sizes[size_to_test], (int)total_parameter_size,
-                (int)max_parameter_size);
-            continue;
-        }
-
-        log_info("Testing vector size %d\n", sizes[size_to_test]);
-
-        // If size is > 1, then we need a explicit convert call.
-        if (sizes[size_to_test] > 1)
-        {
-            sprintf(convert_string, "convert_float%s",
-                    size_strings[size_to_test]);
-        }
-        else
-        {
-            sprintf(convert_string, " ");
-        }
-
-        // Build the kernel
-        sprintf(kernel_string, kernel_code_long, size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                convert_string, convert_string);
-
-        ptr = kernel_string;
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            (const char **)&ptr,
-                                            "test_kernel_long");
-        test_error(error, "create single kernel failed");
-
-        // Set the arguments
-        for (count = 0; count < 2; count++)
-        {
-            switch (count)
-            {
-                case 0:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_long) * sizes[size_to_test],
-                        &l);
-                    break;
-                case 1:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_ulong) * sizes[size_to_test],
-                        &ul);
-                    break;
-                default: log_error("Test error"); break;
-            }
-            if (error)
-                log_error("Setting kernel arg %d %s%s: ", count, types[count],
-                          size_strings[size_to_test]);
-            test_error(error, "clSetKernelArgs failed");
-        }
-        error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
-        test_error(error, "clSetKernelArgs failed");
-
-        // Execute
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0,
-                                       NULL, NULL);
-        test_error(error, "clEnqueueNDRangeKernel failed");
-
-        // Read back the results
-        error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                    sizeof(cl_float) * 2 * 16, results_back, 0,
-                                    NULL, NULL);
-        test_error(error, "clEnqueueReadBuffer failed");
-
-        // Verify the results
-        for (count = 0; count < 2; count++)
-        {
-            for (index = 0; index < sizes[size_to_test]; index++)
-            {
-                switch (count)
-                {
-                    case 0: expected = (float)l[index]; break;
-                    case 1: expected = (float)ul[index]; break;
-                    default: log_error("Test error"); break;
-                }
-
-                if (results_back[count * sizes[size_to_test] + index]
-                    != expected)
-                {
-                    total_errors++;
-                    log_error("Conversion from %s%s failed: index %d got %g, "
-                              "expected %g.\n",
-                              types[count], size_strings[size_to_test], index,
-                              results_back[count * sizes[size_to_test] + index],
-                              expected);
-                }
-            }
-        }
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
     }
 
-    return total_errors;
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+
+    // Build the kernel
+    sprintf(kernel_string, kernel_code_long,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 2; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 2; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)l[index]; break;
+          case 1: expected = (float)ul[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  return total_errors;
 }
 
-int test_parameter_types(cl_device_id device, cl_context context,
-                         cl_command_queue queue, int num_elements)
+int test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    clMemWrapper results;
-    int error;
-    size_t global[3] = { 1, 1, 1 };
-    float results_back[7 * 16];
-    int count, index;
-    const char *types[] = { "char", "uchar", "short", "ushort",
-                            "int",  "uint",  "float" };
-    char kernel_string[8192];
-    int sizes[] = { 1, 2, 4, 8, 16 };
-    const char *size_strings[] = { "", "2", "4", "8", "16" };
-    float expected;
-    int total_errors = 0;
-    int size_to_test;
-    char *ptr;
-    char convert_string[1024];
-    size_t max_parameter_size;
+  clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[7*16];
+  int count, index;
+  const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
 
-    // We don't really care about the contents since we're just testing that the
-    // types work.
-    cl_char c[16] = { 0, -1, 2,  -3,  4,  -5,  6,  -7,
-                      8, -9, 10, -11, 12, -13, 14, -15 };
-    cl_uchar uc[16] = { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-    cl_short s[16] = { -17, -1, 2,  -3,  4,  -5,  6,  -7,
-                       8,   -9, 10, -11, 12, -13, 14, -15 };
-    cl_ushort us[16] = {
-        18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-    };
-    cl_int i[16] = { -19, -1, 2,  -3,  4,  -5,  6,  -7,
-                     8,   -9, 10, -11, 12, -13, 14, -15 };
-    cl_uint ui[16] = { 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-    cl_float f[16] = { -23, -1, 2,  -3,  4,  -5,  6,  -7,
-                       8,   -9, 10, -11, 12, -13, 14, -15 };
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
 
-    // Calculate how large our paramter size is to the kernel
-    size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar)
-        + sizeof(cl_short) + sizeof(cl_ushort) + sizeof(cl_int)
-        + sizeof(cl_uint) + sizeof(cl_float);
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
+  sizeof(cl_short) +sizeof(cl_ushort) +
+  sizeof(cl_int) +sizeof(cl_uint) +
+  sizeof(cl_float);
 
-    // Init our strings.
-    kernel_string[0] = '\0';
-    convert_string[0] = '\0';
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
 
-    // Get the maximum parameter size allowed
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE,
-                        sizeof(max_parameter_size), &max_parameter_size, NULL);
-    test_error(error, "Unable to get max parameter size from device");
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
 
-    // Create the results buffer
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             sizeof(cl_float) * 7 * 16, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
 
-    // Go over all the vector sizes
-    for (size_to_test = 0; size_to_test < 5; size_to_test++)
-    {
-        clProgramWrapper program;
-        clKernelWrapper kernel;
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
 
-        size_t total_parameter_size =
-            parameter_size * sizes[size_to_test] + sizeof(cl_mem);
-        if (total_parameter_size > max_parameter_size)
-        {
-            log_info(
-                "Can not test with vector size %d because it would exceed the "
-                "maximum allowed parameter size to the kernel. (%d > %d)\n",
-                (int)sizes[size_to_test], (int)total_parameter_size,
-                (int)max_parameter_size);
-            continue;
-        }
-
-        log_info("Testing vector size %d\n", sizes[size_to_test]);
-
-        // If size is > 1, then we need a explicit convert call.
-        if (sizes[size_to_test] > 1)
-        {
-            sprintf(convert_string, "convert_float%s",
-                    size_strings[size_to_test]);
-        }
-        else
-        {
-            sprintf(convert_string, " ");
-        }
-
-        // Build the kernel
-        sprintf(kernel_string, kernel_code, size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                size_strings[size_to_test], convert_string, convert_string,
-                convert_string, convert_string, convert_string, convert_string);
-
-        ptr = kernel_string;
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            (const char **)&ptr, "test_kernel");
-        test_error(error, "create single kernel failed");
-
-        // Set the arguments
-        for (count = 0; count < 7; count++)
-        {
-            switch (count)
-            {
-                case 0:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_char) * sizes[size_to_test],
-                        &c);
-                    break;
-                case 1:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_uchar) * sizes[size_to_test],
-                        &uc);
-                    break;
-                case 2:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_short) * sizes[size_to_test],
-                        &s);
-                    break;
-                case 3:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_ushort) * sizes[size_to_test],
-                        &us);
-                    break;
-                case 4:
-                    error = clSetKernelArg(kernel, count,
-                                           sizeof(cl_int) * sizes[size_to_test],
-                                           &i);
-                    break;
-                case 5:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_uint) * sizes[size_to_test],
-                        &ui);
-                    break;
-                case 6:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_float) * sizes[size_to_test],
-                        &f);
-                    break;
-                default: log_error("Test error"); break;
-            }
-            if (error)
-                log_error("Setting kernel arg %d %s%s: ", count, types[count],
-                          size_strings[size_to_test]);
-            test_error(error, "clSetKernelArgs failed");
-        }
-        error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
-        test_error(error, "clSetKernelArgs failed");
-
-        // Execute
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0,
-                                       NULL, NULL);
-        test_error(error, "clEnqueueNDRangeKernel failed");
-
-        // Read back the results
-        error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                    sizeof(cl_float) * 7 * 16, results_back, 0,
-                                    NULL, NULL);
-        test_error(error, "clEnqueueReadBuffer failed");
-
-        // Verify the results
-        for (count = 0; count < 7; count++)
-        {
-            for (index = 0; index < sizes[size_to_test]; index++)
-            {
-                switch (count)
-                {
-                    case 0: expected = (float)c[index]; break;
-                    case 1: expected = (float)uc[index]; break;
-                    case 2: expected = (float)s[index]; break;
-                    case 3: expected = (float)us[index]; break;
-                    case 4: expected = (float)i[index]; break;
-                    case 5: expected = (float)ui[index]; break;
-                    case 6: expected = (float)f[index]; break;
-                    default: log_error("Test error"); break;
-                }
-
-                if (results_back[count * sizes[size_to_test] + index]
-                    != expected)
-                {
-                    total_errors++;
-                    log_error("Conversion from %s%s failed: index %d got %g, "
-                              "expected %g.\n",
-                              types[count], size_strings[size_to_test], index,
-                              results_back[count * sizes[size_to_test] + index],
-                              expected);
-                }
-            }
-        }
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
     }
 
-    if (gHasLong)
-    {
-        log_info("Testing long types...\n");
-        total_errors +=
-            test_parameter_types_long(device, context, queue, num_elements);
-    }
-    else
-    {
-        log_info("Longs unsupported, skipping.");
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
     }
 
-    return total_errors;
+    // Build the kernel
+    sprintf(kernel_string, kernel_code,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string, convert_string,
+            convert_string, convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 7; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
+        case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
+        case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
+        case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
+        case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
+        case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 7; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)c[index]; break;
+          case 1: expected = (float)uc[index]; break;
+          case 2: expected = (float)s[index]; break;
+          case 3: expected = (float)us[index]; break;
+          case 4: expected = (float)i[index]; break;
+          case 5: expected = (float)ui[index]; break;
+          case 6: expected = (float)f[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  if (gHasLong) {
+    log_info("Testing long types...\n");
+    total_errors += test_parameter_types_long( device, context, queue, num_elements );
+  }
+  else {
+    log_info("Longs unsupported, skipping.");
+  }
+
+  return total_errors;
 }
+
+
+

diff --git a/test_conformance/basic/test_constant.cpp b/test_conformance/basic/test_constant.cpp
index ed25c6e..2bd661a 100644
--- a/test_conformance/basic/test_constant.cpp
+++ b/test_conformance/basic/test_constant.cpp

@@ -140,22 +140,19 @@
     tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
     tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
     out  = (cl_float*)malloc(sizeof(cl_float) * constant_values);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * constant_values, NULL, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * constant_values, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * constant_values, NULL, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * constant_values, NULL, NULL);
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index f52162a..3afd22b 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp

@@ -74,42 +74,14 @@
     size_t localsize[2];
     int err;
 
-    // For an OpenCL-3.0 device that does not support non-uniform work-groups
-    // we cannot enqueue local sizes which do not divide the global dimensions
-    // but we can still run the test checking that get_enqueued_local_size ==
-    // get_local_size.
-    bool use_uniform_work_groups{ false };
-    if (get_device_cl_version(device) >= Version(3, 0))
-    {
-        cl_bool areNonUniformWorkGroupsSupported = false;
-        err = clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
-                              sizeof(areNonUniformWorkGroupsSupported),
-                              &areNonUniformWorkGroupsSupported, nullptr);
-        test_error_ret(err, "clGetDeviceInfo failed.", TEST_FAIL);
-
-        if (CL_FALSE == areNonUniformWorkGroupsSupported)
-        {
-            log_info("Non-uniform work group sizes are not supported, "
-                     "enqueuing with uniform workgroups\n");
-            use_uniform_work_groups = true;
-        }
-    }
-
     output_ptr   = (int*)malloc(2 * sizeof(int));
 
-    streams =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), 2*sizeof(int), NULL, &err);
     test_error( err, "clCreateBuffer failed.");
 
-    std::string cl_std = "-cl-std=CL";
-    cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
-    err = create_single_kernel_helper_with_build_options(
-        context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
-        "test_enqueued_local_size_1d", cl_std.c_str());
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code, "test_enqueued_local_size_1d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper_with_build_options(
-        context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
-        "test_enqueued_local_size_2d", cl_std.c_str());
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code, "test_enqueued_local_size_2d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
 
     err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
@@ -126,20 +98,6 @@
 
     localsize[0] = MIN(16, max_wgs);
     localsize[1] = MIN(11, max_wgs / localsize[0]);
-    // If we need to use uniform workgroups because non-uniform workgroups are
-    // not supported, round up to the next global size that is divisible by the
-    // local size.
-    if (use_uniform_work_groups)
-    {
-        if (globalsize[0] % localsize[0])
-        {
-            globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
-        }
-        if (globalsize[1] % localsize[1])
-        {
-            globalsize[1] += (localsize[1] - (globalsize[1] % localsize[1]));
-        }
-    }
 
     err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
     test_error( err, "clEnqueueNDRangeKernel failed.");
@@ -151,10 +109,6 @@
 
     globalsize[0] = (size_t)num_elements;
     localsize[0] = 9;
-    if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
-    {
-        globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
-    }
     err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
     test_error( err, "clEnqueueNDRangeKernel failed.");
 

diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp
index bf38636..34e7da9 100644
--- a/test_conformance/basic/test_explicit_s2v.cpp
+++ b/test_conformance/basic/test_explicit_s2v.cpp

@@ -146,11 +146,9 @@
         return -1;
     }
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                paramSize * count, inputData, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
     test_error( error, "clCreateBuffer failed");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, destStride * count,
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  destStride * count, NULL, &error);
     test_error( error, "clCreateBuffer failed");
 
     /* Set the arguments */

diff --git a/test_conformance/basic/test_float2int.cpp b/test_conformance/basic/test_float2int.cpp
index 4063a95..b6af322 100644
--- a/test_conformance/basic/test_float2int.cpp
+++ b/test_conformance/basic/test_float2int.cpp

@@ -69,15 +69,13 @@
 
     input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp
index 6e5deb4..4ed81cc 100644
--- a/test_conformance/basic/test_fpmath_float.cpp
+++ b/test_conformance/basic/test_fpmath_float.cpp

@@ -161,13 +161,13 @@
     input_ptr[2] = (cl_float*)malloc(length);
     output_ptr   = (cl_float*)malloc(length);
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
 
     p = input_ptr[0];

diff --git a/test_conformance/basic/test_fpmath_float2.cpp b/test_conformance/basic/test_fpmath_float2.cpp
index 1881b4b..a964f6a 100644
--- a/test_conformance/basic/test_fpmath_float2.cpp
+++ b/test_conformance/basic/test_fpmath_float2.cpp

@@ -160,13 +160,13 @@
     input_ptr[2] = (cl_float*)malloc(length);
     output_ptr   = (cl_float*)malloc(length);
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
 
     p = input_ptr[0];

diff --git a/test_conformance/basic/test_fpmath_float4.cpp b/test_conformance/basic/test_fpmath_float4.cpp
index 999c8ec..275b4f3 100644
--- a/test_conformance/basic/test_fpmath_float4.cpp
+++ b/test_conformance/basic/test_fpmath_float4.cpp

@@ -160,13 +160,13 @@
     input_ptr[2] = (cl_float*)malloc(length);
     output_ptr   = (cl_float*)malloc(length);
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
 
     p = input_ptr[0];

diff --git a/test_conformance/basic/test_get_linear_ids.cpp b/test_conformance/basic/test_get_linear_ids.cpp
index 3496fd0..fba200a 100644
--- a/test_conformance/basic/test_get_linear_ids.cpp
+++ b/test_conformance/basic/test_get_linear_ids.cpp

@@ -59,8 +59,7 @@
 
 
     // Create the kernel
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        linear_ids_source, "test_linear_ids");
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, linear_ids_source, "test_linear_ids", "-cl-std=CL2.0");
     if (error)
         return error;
 

diff --git a/test_conformance/basic/test_global_linear_id.cpp b/test_conformance/basic/test_global_linear_id.cpp
index 046d12a..7bed5b8 100644
--- a/test_conformance/basic/test_global_linear_id.cpp
+++ b/test_conformance/basic/test_global_linear_id.cpp

@@ -70,24 +70,19 @@
     cl_kernel kernel[2];
 
     int *output_ptr;
-    size_t threads[2];
-    int err;
-    num_elements = (int)sqrt((float)num_elements);
-    int length = num_elements * num_elements;
+      size_t threads[2];
+      int err;
+      num_elements = (int)sqrt((float)num_elements);
+      int length = num_elements * num_elements;
 
-    output_ptr = (int *)malloc(sizeof(int) * length);
+      output_ptr   = (int*)malloc(sizeof(int) * length);
 
-    streams = clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(int),
-                             NULL, &err);
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
     test_error( err, "clCreateBuffer failed.");
 
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1,
-                                      &global_linear_id_1d_code,
-                                      "test_global_linear_id_1d");
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &global_linear_id_1d_code, "test_global_linear_id_1d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1,
-                                      &global_linear_id_2d_code,
-                                      "test_global_linear_id_2d");
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &global_linear_id_2d_code, "test_global_linear_id_2d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
 
     err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);

diff --git a/test_conformance/basic/test_global_work_offsets.cpp b/test_conformance/basic/test_global_work_offsets.cpp
index 39b54b4..edaca38 100644
--- a/test_conformance/basic/test_global_work_offsets.cpp
+++ b/test_conformance/basic/test_global_work_offsets.cpp

@@ -138,9 +138,7 @@
     memset( outputA, 0xff, sizeof( outputA ) );
     for( int i = 0; i < 3; i++ )
     {
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                           sizeof(outputA), outputA, &error);
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
         test_error( error, "Unable to create output array" );
     }
 
@@ -230,9 +228,7 @@
 
     // Create some output streams, and storage for a single control ID
     memset( outOffsets, 0xff, sizeof( outOffsets ) );
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(outOffsets), outOffsets, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
     test_error( error, "Unable to create control ID buffer" );
 
     // Run a few different times

diff --git a/test_conformance/basic/test_hostptr.cpp b/test_conformance/basic/test_hostptr.cpp
index 65af5c3..f40cb69 100644
--- a/test_conformance/basic/test_hostptr.cpp
+++ b/test_conformance/basic/test_hostptr.cpp

@@ -131,53 +131,39 @@
         make_random_data(num_elements, input_ptr[1], d);
 
         // Create host-side input
-        streams[0] =
-            clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                           sizeof(cl_float) * num_elements, input_ptr[0], &err);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
         test_error(err, "clCreateBuffer 0 failed");
 
         // Create a copied input
-        streams[1] =
-            clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                           sizeof(cl_float) * num_elements, input_ptr[1], &err);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
         test_error(err, "clCreateBuffer 1 failed");
 
         // Create a host-side output
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                           sizeof(cl_float) * num_elements, output_ptr, &err);
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
         test_error(err, "clCreateBuffer 2 failed");
 
         // Create a host-side input
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[3] =
-            create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_inptr, &err);
+        streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
         test_error(err, "create_image_2d 3 failed");
 
         // Create a copied input
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[4] =
-            create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_inptr, &err);
+        streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
         test_error(err, "create_image_2d 4 failed");
 
         // Create a host-side output
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[5] =
-            create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_outptr, &err);
+        streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
         test_error(err, "create_image_2d 5 failed");
 
         // Create a copied output
         img_format.image_channel_data_type = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[6] =
-            create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_outptr, &err);
+        streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
         test_error(err, "create_image_2d 6 failed");
 
         err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );

diff --git a/test_conformance/basic/test_image_multipass.cpp b/test_conformance/basic/test_image_multipass.cpp
index 7f51665..cd91a13 100644
--- a/test_conformance/basic/test_image_multipass.cpp
+++ b/test_conformance/basic/test_image_multipass.cpp

@@ -172,7 +172,7 @@
         cl_mem_flags        flags;
 
         initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
 
         accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
         if (!accum_streams[0])
@@ -224,7 +224,7 @@
         MTdata              d;
 
         input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
 
         int i;
         d = init_genrand( gRandomSeed );
@@ -425,7 +425,7 @@
         cl_mem_flags        flags;
 
         initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
 
         accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
         if (!accum_streams[0])
@@ -469,7 +469,7 @@
         MTdata              d;
 
         input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
 
         int i;
         d = init_genrand( gRandomSeed );

diff --git a/test_conformance/basic/test_image_r8.cpp b/test_conformance/basic/test_image_r8.cpp
index b633d6a..7805c1b 100644
--- a/test_conformance/basic/test_image_r8.cpp
+++ b/test_conformance/basic/test_image_r8.cpp

@@ -88,9 +88,7 @@
     img_format.image_channel_data_type = CL_UNSIGNED_INT8;
 
     // early out if this image type is not supported
-    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE2D, &img_format))
-    {
+    if( ! is_image_format_supported( context, (cl_mem_flags)(CL_MEM_READ_ONLY), CL_MEM_OBJECT_IMAGE2D, &img_format ) ) {
         log_info("WARNING: Image type not supported; skipping test.\n");
         return 0;
     }
@@ -100,17 +98,14 @@
     free_mtdata(d); d = NULL;
 
     output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
-    streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[0])
     {
         log_error("create_image_2d failed\n");
         return -1;
     }
 
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_uchar) * img_width * img_height, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp
index 0246d80..4240466 100644
--- a/test_conformance/basic/test_imagearraycopy.cpp
+++ b/test_conformance/basic/test_imagearraycopy.cpp

@@ -38,8 +38,7 @@
 
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
 
-  image = create_image_2d(context, CL_MEM_READ_WRITE, format, img_width,
-                          img_height, 0, NULL, &err);
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
   test_error(err, "create_image_2d failed");
 
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -47,7 +46,7 @@
 
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
 
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
 
   d = init_genrand( gRandomSeed );

diff --git a/test_conformance/basic/test_imagearraycopy3d.cpp b/test_conformance/basic/test_imagearraycopy3d.cpp
index 19dfdbc..e34aa7d 100644
--- a/test_conformance/basic/test_imagearraycopy3d.cpp
+++ b/test_conformance/basic/test_imagearraycopy3d.cpp

@@ -38,8 +38,7 @@
 
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
 
-  image = create_image_3d(context, CL_MEM_READ_ONLY, format, img_width,
-                          img_height, img_depth, 0, 0, NULL, &err);
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
   test_error(err, "create_image_3d failed");
 
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -47,7 +46,7 @@
 
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
 
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
 
   d = init_genrand( gRandomSeed );
@@ -122,15 +121,12 @@
 
   PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
 
-  err = clGetSupportedImageFormats(
-      context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
   test_error(err, "clGetSupportedImageFormats failed");
 
   formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
 
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE3D, num_formats, formats,
-                                   NULL);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
   test_error(err, "clGetSupportedImageFormats failed");
 
   for (i = 0; i < num_formats; i++) {

diff --git a/test_conformance/basic/test_imagecopy.cpp b/test_conformance/basic/test_imagecopy.cpp
index bcb9fef..e74827d 100644
--- a/test_conformance/basic/test_imagecopy.cpp
+++ b/test_conformance/basic/test_imagecopy.cpp

@@ -132,29 +132,23 @@
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[3] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[4] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[5] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     for (i=0; i<3; i++)

diff --git a/test_conformance/basic/test_imagedim.cpp b/test_conformance/basic/test_imagedim.cpp
index 008c88b..6d8cdb3 100644
--- a/test_conformance/basic/test_imagedim.cpp
+++ b/test_conformance/basic/test_imagedim.cpp

@@ -38,25 +38,24 @@
 "}\n";
 
 
-static unsigned char *generate_8888_image(size_t w, size_t h, MTdata d)
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
 {
-    unsigned char *ptr = new unsigned char[4 * w * h];
-    size_t i;
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
 
-    for (i = 0; i < w * h * 4; i++)
-    {
+    for (i=0; i<w*h*4; i++)
         ptr[i] = (unsigned char)genrand_int32(d);
-    }
 
     return ptr;
 }
 
-static int verify_8888_image(unsigned char *image, unsigned char *outptr,
-                             size_t w, size_t h)
+static int
+verify_8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
 {
-    size_t i;
+    int     i;
 
-    for (i = 0; i < w * h; i++)
+    for (i=0; i<w*h; i++)
     {
         if (outptr[i] != image[i])
             return -1;
@@ -69,18 +68,18 @@
 int
 test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
 {
-    cl_mem streams[2];
-    cl_image_format img_format;
-    unsigned char *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[2];
-    cl_ulong max_mem_size;
-    size_t img_width, max_img_width;
-    size_t img_height, max_img_height;
-    size_t max_img_dim;
-    int i, j, i2, j2, err = 0;
-    size_t max_image2d_width, max_image2d_height;
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2];
+     cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
     int total_errors = 0;
     MTdata  d;
 
@@ -121,15 +120,15 @@
     cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
     test_error(err, "clCreateSampler failed");
 
-    max_img_width = max_image2d_width;
-    max_img_height = max_image2d_height;
+    max_img_width = (int)max_image2d_width;
+    max_img_height = (int)max_image2d_height;
 
     // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
   //  and we want to consume 1/4 of global memory (this is the minimum required to be
   //  supported by the spec)
     max_mem_size /= 4; // use 1/4
     max_mem_size /= 4; // 4 bytes per pixel
-    max_img_dim = (size_t)sqrt((double)max_mem_size);
+    max_img_dim = (int)sqrt((double)max_mem_size);
     // convert to a power of 2
     {
         unsigned int    n = (unsigned int)max_img_dim;
@@ -139,7 +138,7 @@
         while (m > n)
             m >>= 1;
 
-        max_img_dim = m;
+        max_img_dim = (int)m;
     }
 
     if (max_img_width > max_img_dim)
@@ -152,41 +151,36 @@
 
     d = init_genrand( gRandomSeed );
     input_ptr = generate_8888_image(max_img_width, max_img_height, d);
-
-    output_ptr = new unsigned char[4 * max_img_width * max_img_height];
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
 
     // test power of 2 width, height starting at 1 to 4K
-    for (i = 1, i2 = 0; i <= max_img_height; i <<= 1, i2++)
+    for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
     {
         img_height = (1 << i2);
-        for (j = 1, j2 = 0; j <= max_img_width; j <<= 1, j2++)
+        for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
         {
             img_width = (1 << j2);
 
             img_format.image_channel_order = CL_RGBA;
             img_format.image_channel_data_type = CL_UNORM_INT8;
-            streams[0] =
-                create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                img_width, img_height, 0, NULL, NULL);
+            streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
             if (!streams[0])
             {
                 log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 free_mtdata(d);
                 return -1;
             }
             img_format.image_channel_order = CL_RGBA;
             img_format.image_channel_data_type = CL_UNORM_INT8;
-            streams[1] =
-                create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                img_width, img_height, 0, NULL, NULL);
+            streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
             if (!streams[1])
             {
                 log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
                 clReleaseMemObject(streams[0]);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 free_mtdata(d);
                 return -1;
             }
@@ -199,8 +193,8 @@
                 log_error("clWriteImage failed\n");
                 clReleaseMemObject(streams[0]);
                 clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 free_mtdata(d);
                 return -1;
             }
@@ -213,8 +207,8 @@
                 log_error("clSetKernelArgs failed\n");
                 clReleaseMemObject(streams[0]);
                 clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 free_mtdata(d);
                 return -1;
             }
@@ -230,8 +224,8 @@
                             img_width, img_height);
                 clReleaseMemObject(streams[0]);
                 clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 free_mtdata(d);
                 return -1;
             }
@@ -243,8 +237,8 @@
                             img_width, img_height);
                 clReleaseMemObject(streams[0]);
                 clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 free_mtdata(d);
                 return -1;
             }
@@ -261,8 +255,8 @@
     }
 
     // cleanup
-    delete[] input_ptr;
-    delete[] output_ptr;
+    free(input_ptr);
+    free(output_ptr);
     free_mtdata(d);
     clReleaseSampler(sampler);
     clReleaseKernel(kernel);
@@ -276,18 +270,18 @@
 int
 test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
 {
-    cl_mem streams[2];
-    cl_image_format img_format;
-    unsigned char *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[2], local_threads[2];
-    cl_ulong max_mem_size;
-    size_t img_width, max_img_width;
-    size_t img_height, max_img_height;
-    size_t max_img_dim;
-    int i, j, i2, j2, err = 0;
-    size_t max_image2d_width, max_image2d_height;
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2], local_threads[2];
+    cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
     int total_errors = 0;
     size_t max_local_workgroup_size[3];
     MTdata d;
@@ -367,10 +361,10 @@
 
     d = init_genrand( gRandomSeed );
     input_ptr = generate_8888_image(max_img_width, max_img_height, d);
-    output_ptr = new unsigned char[4 * max_img_width * max_img_height];
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
 
     int plus_minus;
-    for (plus_minus = 0; plus_minus < 3; plus_minus++)
+    for (plus_minus=0; plus_minus < 3; plus_minus++)
     {
 
     // test power of 2 width, height starting at 1 to 4K
@@ -381,8 +375,8 @@
             {
                 img_width = (1 << j2);
 
-                size_t effective_img_height = img_height;
-                size_t effective_img_width = img_width;
+                int effective_img_height = img_height;
+                int effective_img_width = img_width;
 
                 local_threads[0] = 1;
                 local_threads[1] = 1;
@@ -410,28 +404,24 @@
 
                 img_format.image_channel_order = CL_RGBA;
                 img_format.image_channel_data_type = CL_UNORM_INT8;
-                streams[0] = create_image_2d(
-                    context, CL_MEM_READ_WRITE, &img_format,
-                    effective_img_width, effective_img_height, 0, NULL, NULL);
+                streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
                 if (!streams[0])
                 {
                     log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     free_mtdata(d);
                     return -1;
                 }
                 img_format.image_channel_order = CL_RGBA;
                 img_format.image_channel_data_type = CL_UNORM_INT8;
-                streams[1] = create_image_2d(
-                    context, CL_MEM_READ_WRITE, &img_format,
-                    effective_img_width, effective_img_height, 0, NULL, NULL);
+                streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
                 if (!streams[1])
                 {
                     log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
                     clReleaseMemObject(streams[0]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     free_mtdata(d);
                     return -1;
                 }
@@ -444,8 +434,8 @@
                     log_error("clWriteImage failed\n");
                     clReleaseMemObject(streams[0]);
                     clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     free_mtdata(d);
                     return -1;
                 }
@@ -458,8 +448,8 @@
                     log_error("clSetKernelArgs failed\n");
                     clReleaseMemObject(streams[0]);
                     clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     free_mtdata(d);
                     return -1;
                 }
@@ -476,8 +466,8 @@
                                 effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
                     clReleaseMemObject(streams[0]);
                     clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     free_mtdata(d);
                     return -1;
                 }
@@ -489,8 +479,8 @@
                                 effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
                     clReleaseMemObject(streams[0]);
                     clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     free_mtdata(d);
                     return -1;
                 }
@@ -508,15 +498,15 @@
 
   }
 
-  // cleanup
-  delete[] input_ptr;
-  delete[] output_ptr;
-  free_mtdata(d);
-  clReleaseSampler(sampler);
-  clReleaseKernel(kernel);
-  clReleaseProgram(program);
+    // cleanup
+    free(input_ptr);
+    free(output_ptr);
+    free_mtdata(d);
+    clReleaseSampler(sampler);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
 
-  return total_errors;
+    return total_errors;
 }
 
 

diff --git a/test_conformance/basic/test_imagenpot.cpp b/test_conformance/basic/test_imagenpot.cpp
index baa5b2e..4713c30 100644
--- a/test_conformance/basic/test_imagenpot.cpp
+++ b/test_conformance/basic/test_imagenpot.cpp

@@ -110,8 +110,8 @@
 
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                     img_width, img_height, 0, NULL, NULL);
+        streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
         if (!streams[0])
         {
             log_error("create_image_2d failed\n");
@@ -120,8 +120,8 @@
         }
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                     img_width, img_height, 0, NULL, NULL);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
         if (!streams[1])
         {
             log_error("create_image_2d failed\n");

diff --git a/test_conformance/basic/test_imagerandomcopy.cpp b/test_conformance/basic/test_imagerandomcopy.cpp
index c3355de..494d6c2 100644
--- a/test_conformance/basic/test_imagerandomcopy.cpp
+++ b/test_conformance/basic/test_imagerandomcopy.cpp

@@ -146,29 +146,23 @@
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[3] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[4] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[5] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     for (i=0; i<3; i++)

diff --git a/test_conformance/basic/test_imagereadwrite.cpp b/test_conformance/basic/test_imagereadwrite.cpp
index c074238..dd1923e 100644
--- a/test_conformance/basic/test_imagereadwrite.cpp
+++ b/test_conformance/basic/test_imagereadwrite.cpp

@@ -215,20 +215,17 @@
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
 
     for (i=0; i<3; i++)

diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp
index 483698a..d298dc7 100644
--- a/test_conformance/basic/test_int2float.cpp
+++ b/test_conformance/basic/test_int2float.cpp

@@ -68,15 +68,13 @@
 
     input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/basic/test_local_linear_id.cpp b/test_conformance/basic/test_local_linear_id.cpp
index 279bd71..cdf5ee6 100644
--- a/test_conformance/basic/test_local_linear_id.cpp
+++ b/test_conformance/basic/test_local_linear_id.cpp

@@ -66,29 +66,24 @@
 int
 test_local_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    cl_mem streams;
-    cl_program program[2];
-    cl_kernel kernel[2];
+      cl_mem streams;
+      cl_program program[2];
+      cl_kernel kernel[2];
 
     int *output_ptr;
-    size_t threads[2];
-    int err;
-    num_elements = (int)sqrt((float)num_elements);
-    int length = num_elements * num_elements;
+      size_t threads[2];
+      int err;
+      num_elements = (int)sqrt((float)num_elements);
+      int length = num_elements * num_elements;
 
-    output_ptr = (cl_int *)malloc(sizeof(int) * length);
+      output_ptr   = (cl_int*)malloc(sizeof(int) * length);
 
-    streams = clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(int),
-                             NULL, &err);
-    test_error(err, "clCreateBuffer failed.");
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
 
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1,
-                                      &local_linear_id_1d_code,
-                                      "test_local_linear_id_1d");
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &local_linear_id_1d_code, "test_local_linear_id_1d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1,
-                                      &local_linear_id_2d_code,
-                                      "test_local_linear_id_2d");
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &local_linear_id_2d_code, "test_local_linear_id_2d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
 
     err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);

diff --git a/test_conformance/basic/test_multireadimagemultifmt.cpp b/test_conformance/basic/test_multireadimagemultifmt.cpp
index 7fe58d3..5c93d2f 100644
--- a/test_conformance/basic/test_multireadimagemultifmt.cpp
+++ b/test_conformance/basic/test_multireadimagemultifmt.cpp

@@ -136,8 +136,7 @@
 
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[0])
     {
         log_error("create_image_2d failed\n");
@@ -145,8 +144,7 @@
     }
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[1])
     {
         log_error("create_image_2d failed\n");
@@ -154,17 +152,14 @@
     }
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[2])
     {
         log_error("create_image_2d failed\n");
         return -1;
     }
 
-    streams[3] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(float) * 4 * img_width * img_height, NULL, NULL);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float)*4 * img_width*img_height, NULL, NULL);
     if (!streams[3])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/basic/test_numeric_constants.cpp b/test_conformance/basic/test_numeric_constants.cpp
index 83687ee..5aeca0e 100644
--- a/test_conformance/basic/test_numeric_constants.cpp
+++ b/test_conformance/basic/test_numeric_constants.cpp

@@ -242,14 +242,11 @@
     }
 
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float_out),
-                                NULL, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float_out), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int_out),
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(int_out), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(uint_out),
-                                NULL, &error);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(uint_out), NULL, &error);
     test_error( error, "Creating test array failed" );
 
     error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
@@ -351,11 +348,9 @@
             return -1;
         }
 
-        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(long_out), NULL, &error);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
         test_error( error, "Creating test array failed" );
-        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(ulong_out), NULL, &error);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(ulong_out), NULL, &error);
         test_error( error, "Creating test array failed" );
 
         error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
@@ -394,11 +389,9 @@
             return -1;
         }
 
-        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(double_out), NULL, &error);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(double_out), NULL, &error);
         test_error( error, "Creating test array failed" );
-        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(long_out), NULL, &error);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
         test_error( error, "Creating test array failed" );
 
         error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
@@ -540,11 +533,9 @@
 
 
     /* Create some I/O streams */
-    intStream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(intOut), NULL,
-                               &error);
+    intStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(intOut), NULL, &error );
     test_error( error, "Creating test array failed" );
-    floatStream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(floatOut),
-                                 NULL, &error);
+    floatStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(floatOut), NULL, &error );
     test_error( error, "Creating test array failed" );
 
     // Stage 1: basic limits on MAXFLOAT
@@ -686,8 +677,7 @@
                 return -1;
             }
 
-            doubleStream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                          sizeof(doubleOut), NULL, &error);
+            doubleStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(doubleOut), NULL, &error );
             test_error( error, "Creating test array failed" );
 
             error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );

diff --git a/test_conformance/basic/test_preprocessors.cpp b/test_conformance/basic/test_preprocessors.cpp
index 2038d15..332f99d 100644
--- a/test_conformance/basic/test_preprocessors.cpp
+++ b/test_conformance/basic/test_preprocessors.cpp

@@ -125,14 +125,11 @@
     }
 
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(results),
-                                NULL, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(results), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(fileString),
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(fileString), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(roundingString), NULL, &error);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(roundingString), NULL, &error);
     test_error( error, "Creating test array failed" );
 
     // Set up and run
@@ -216,15 +213,33 @@
 
     // The OpenCL version reported by the macro reports the feature level supported by the compiler. Since
     // this doesn't directly match any property we can query, we just check to see if it's a sane value
-    auto device_cl_version = get_device_cl_version(deviceID);
-    int device_cl_version_int = device_cl_version.to_int() * 10;
-    if ((results[2] < 100) || (results[2] > device_cl_version_int))
+    char versionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( versionBuffer ), versionBuffer, NULL );
+    test_error( error, "Unable to get device's version to validate against" );
+
+    // We need to parse to get the version number to compare against
+    char *p1, *p2, *p3;
+    for( p1 = versionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+
+    if( p2 == p3 )
     {
-        log_error("ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make "
-                  "sense w.r.t. device's version string! "
-                  "(preprocessor states %d, CL_DEVICE_VERSION is %d (%s))\n",
-                  results[2], device_cl_version_int,
-                  device_cl_version.to_string().c_str());
+        log_error( "ERROR: Unable to verify OpenCL version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    int major = atoi( p1 );
+    int minor = atoi( p2 + 1 );
+    int realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 2 ] < 100 ) || ( results[ 2 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
         return -1;
     }
 
@@ -235,29 +250,33 @@
         return -1;
     }
 
-    // The OpenCL C version reported by the macro reports the OpenCL C version
-    // specified to the compiler. We need to see whether it is supported.
-    int cl_c_major_version = results[3] / 100;
-    int cl_c_minor_version = (results[3] / 10) % 10;
-    if ((results[3] < 100)
-        || (!device_supports_cl_c_version(
-            deviceID, Version{ cl_c_major_version, cl_c_minor_version })))
+    // The OpenCL C version reported by the macro reports the OpenCL C supported by the compiler for this OpenCL device.
+    char cVersionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( cVersionBuffer ), cVersionBuffer, NULL );
+    test_error( error, "Unable to get device's OpenCL C version to validate against" );
+
+    // We need to parse to get the version number to compare against
+    for( p1 = cVersionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+
+    if( p2 == p3 )
     {
-        auto device_version = get_device_cl_c_version(deviceID);
-        log_error(
-            "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make "
-            "sense w.r.t. device's version string! "
-            "(preprocessor states %d, CL_DEVICE_OPENCL_C_VERSION is %d (%s))\n",
-            results[3], device_version.to_int() * 10,
-            device_version.to_string().c_str());
-        log_error("This means that CL_DEVICE_OPENCL_C_VERSION < "
-                  "__OPENCL_C_VERSION__");
-        if (device_cl_version >= Version{ 3, 0 })
-        {
-            log_error(", and __OPENCL_C_VERSION__ does not appear in "
-                      "CL_DEVICE_OPENCL_C_ALL_VERSIONS");
-        }
-        log_error("\n");
+        log_error( "ERROR: Unable to verify OpenCL C version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    major = atoi( p1 );
+    minor = atoi( p2 + 1 );
+    realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 3 ] < 100 ) || ( results[ 3 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
         return -1;
     }
 

diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 62c0a6b..9062a54 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp

@@ -1510,29 +1510,6 @@
     return err;
 }
 
-// Determines whether its valid to skip this test based on the driver version
-// and the features it optionally supports.
-// Whether the test should be skipped is writen into the out paramter skip.
-// The check returns an error code for the clDeviceInfo query.
-static cl_int should_skip(cl_device_id device, cl_bool& skip)
-{
-    // Assume we can't skip to begin with.
-    skip = CL_FALSE;
-
-    // Progvar tests are already skipped for OpenCL < 2.0, so here we only need
-    // to test for 3.0 since that is when program scope global variables become
-    // optional.
-    if (get_device_cl_version(device) >= Version(3, 0))
-    {
-        size_t max_global_variable_size{};
-        test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
-                                   sizeof(max_global_variable_size),
-                                   &max_global_variable_size, nullptr),
-                   "clGetDeviceInfo failed");
-        skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE;
-    }
-    return CL_SUCCESS;
-}
 
 ////////////////////
 // Global functions
@@ -1541,18 +1518,6 @@
 // Test support for variables at program scope. Miscellaneous
 int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info("Skipping progvar_prog_scope_misc since it is optionally not "
-                 "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
 
@@ -1572,19 +1537,6 @@
 // Test support for variables at program scope. Unitialized data
 int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info(
-            "Skipping progvar_prog_scope_uninit since it is optionally not "
-            "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
 
@@ -1601,18 +1553,6 @@
 // Test support for variables at program scope. Initialized data.
 int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info("Skipping progvar_prog_scope_init since it is optionally not "
-                 "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
 
@@ -1630,18 +1570,6 @@
 // A simple test for support of static variables inside a kernel.
 int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info("Skipping progvar_func_scope since it is optionally not "
-                 "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
 

diff --git a/test_conformance/basic/test_queue_priority.cpp b/test_conformance/basic/test_queue_priority.cpp
index 57ce504..831defe 100644
--- a/test_conformance/basic/test_queue_priority.cpp
+++ b/test_conformance/basic/test_queue_priority.cpp

@@ -235,18 +235,18 @@
     oldMode = get_round();
   }
 
-  input_ptr[0] = (cl_float *)malloc(length);
-  input_ptr[1] = (cl_float *)malloc(length);
-  input_ptr[2] = (cl_float *)malloc(length);
-  output_ptr = (cl_float *)malloc(length);
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
 
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
 
   p = input_ptr[0];

diff --git a/test_conformance/basic/test_rw_image_access_qualifier.cpp b/test_conformance/basic/test_rw_image_access_qualifier.cpp
index 87e3f60..832ec86 100644
--- a/test_conformance/basic/test_rw_image_access_qualifier.cpp
+++ b/test_conformance/basic/test_rw_image_access_qualifier.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -43,29 +43,6 @@
 
 int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, cl_command_queue commands, int num_elements)
 {
-    // This test should be skipped if images are not supported.
-    if (checkForImageSupport(device_id))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    // Support for read-write image arguments is required for an
-    // or 2.X device if the device supports images. In OpenCL-3.0
-    // read-write images are optional. This test is already being skipped
-    // for 1.X devices.
-    if (get_device_cl_version(device_id) >= Version(3, 0))
-    {
-        cl_uint are_rw_images_supported{};
-        test_error(
-            clGetDeviceInfo(device_id, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                            sizeof(are_rw_images_supported),
-                            &are_rw_images_supported, nullptr),
-            "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_IMAGE_ARGS\n");
-        if (0 == are_rw_images_supported)
-        {
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
 
     unsigned int i;
 
@@ -109,8 +86,7 @@
     }
 
     /* Build the program executable */
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &rw_kernel_code, "test_rw_images");
+  err = create_single_kernel_helper_with_build_options(context,&program,&kernel,1,&rw_kernel_code,"test_rw_images", "-cl-std=CL2.0");
     if (err != CL_SUCCESS || !program) {
         log_error("Error: clCreateProgramWithSource failed\n");
     return err;
@@ -121,7 +97,8 @@
     format.image_channel_data_type = CL_UNSIGNED_INT32;
 
     /* Create input image */
-    flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
+    flags = (cl_mem_flags) (CL_MEM_READ_WRITE
+                            | CL_MEM_COPY_HOST_PTR);
     src_image = create_image_2d(context, flags, &format,
                                 size_x, size_y, 0,
                                 (void *)input, &err);

diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index 66a6c56..fc3fd0a 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp

@@ -49,8 +49,13 @@
     {
         sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
     }
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
+    bool deviceLt20 = false;
+    Version version = get_device_cl_version(device);
+    if (version < Version(2,0)) {
+        deviceLt20 = true;
+    }
+
+    cl_int err = create_single_kernel_helper_with_build_options(context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", deviceLt20 ? "" : "-cl-std=CL2.0");
     if( err )
         return err;
 

diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp
deleted file mode 100644
index 5ab3ea4..0000000
--- a/test_conformance/basic/test_vector_swizzle.cpp
+++ /dev/null

@@ -1,699 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include <algorithm>
-#include <numeric>
-#include <string>
-#include <vector>
-
-#include "procs.h"
-#include "harness/testHarness.h"
-
-template <int N> struct TestInfo
-{
-};
-
-template <> struct TestInfo<2>
-{
-    static const size_t vector_size = 2;
-
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].xy = value;
-    dst[index++].yx = value;
-
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.xy;
-    dst[index++] = value.yx;
-}
-)CLC";
-
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].rg = value;
-    dst[index++].gr = value;
-
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.rg;
-    dst[index++] = value.gr;
-}
-)CLC";
-
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s01 = value;
-    dst[index++].s10 = value;
-
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s01;
-    dst[index++] = value.s10;
-}
-)CLC";
-};
-
-template <> struct TestInfo<3>
-{
-    static const size_t vector_size = 4; // sizeof(vec3) is four elements
-
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    TYPE t;
-    t = dst[index]; t.x = value.x;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.y = value.x;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.z = value.x;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.xyz = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.zyx = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-
-    // rvalue swizzles
-    vstore3(value.x, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.y, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.z, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.xyz, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.zyx, 0, (__global BASETYPE*)(dst + index++));
-}
-)CLC";
-
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    TYPE t;
-    t = dst[index]; t.r = value.r;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.g = value.r;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.b = value.r;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.rgb = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.bgr = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-
-    // rvalue swizzles
-    vstore3(value.r, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.g, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.b, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.rgb, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.bgr, 0, (__global BASETYPE*)(dst + index++));
-}
-)CLC";
-
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    TYPE t;
-    t = dst[index]; t.s0 = value.s0;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s1 = value.s0;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s2 = value.s0;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s012 = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s210 = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-
-    // rvalue swizzles
-    vstore3(value.s0, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s1, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s2, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s012, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s210, 0, (__global BASETYPE*)(dst + index++));
-}
-)CLC";
-};
-
-template <> struct TestInfo<4>
-{
-    static const size_t vector_size = 4;
-
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].z = value.x;
-    dst[index++].w = value.x;
-    dst[index++].xyzw = value;
-    dst[index++].wzyx = value;
-
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.z;
-    dst[index++] = value.w;
-    dst[index++] = value.xyzw;
-    dst[index++] = value.wzyx;
-}
-)CLC";
-
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].b = value.r;
-    dst[index++].a = value.r;
-    dst[index++].rgba = value;
-    dst[index++].abgr = value;
-
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.b;
-    dst[index++] = value.a;
-    dst[index++] = value.rgba;
-    dst[index++] = value.abgr;
-}
-)CLC";
-
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s2 = value.s0;
-    dst[index++].s3 = value.s0;
-    dst[index++].s0123 = value;
-    dst[index++].s3210 = value;
-
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s2;
-    dst[index++] = value.s3;
-    dst[index++] = value.s0123;
-    dst[index++] = value.s3210;
-}
-)CLC";
-};
-
-template <> struct TestInfo<8>
-{
-    static const size_t vector_size = 8;
-
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // xwzw only for first four components!
-
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].z = value.x;
-    dst[index++].w = value.x;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index].xyzw = value.s0123;
-    dst[index++].s4567 = value.s4567;
-    dst[index].s7654 = value.s0123;
-    dst[index++].wzyx = value.s4567;
-
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.z;
-    dst[index++] = value.w;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = (TYPE)(value.xyzw, value.s4567);
-    dst[index++] = (TYPE)(value.s7654, value.wzyx);
-}
-)CLC";
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // rgba only for first four components!
-
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].b = value.r;
-    dst[index++].a = value.r;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index].rgba = value.s0123;
-    dst[index++].s4567 = value.s4567;
-    dst[index].s7654 = value.s0123;
-    dst[index++].abgr = value.s4567;
-
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.b;
-    dst[index++] = value.a;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = (TYPE)(value.rgba, value.s4567);
-    dst[index++] = (TYPE)(value.s7654, value.abgr);
-}
-)CLC";
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s2 = value.s0;
-    dst[index++].s3 = value.s0;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s01234567 = value;
-    dst[index++].s76543210 = value;
-
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s2;
-    dst[index++] = value.s3;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s01234567;
-    dst[index++] = value.s76543210;
-}
-)CLC";
-};
-
-template <> struct TestInfo<16>
-{
-    static const size_t vector_size = 16;
-
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // xwzw only for first four components!
-
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].z = value.x;
-    dst[index++].w = value.x;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s8 = value.s0;
-    dst[index++].s9 = value.s0;
-    dst[index++].sa = value.s0;
-    dst[index++].sb = value.s0;
-    dst[index++].sc = value.s0;
-    dst[index++].sd = value.s0;
-    dst[index++].se = value.s0;
-    dst[index++].sf = value.s0;
-    dst[index].xyzw = value.s0123;
-    dst[index].s4567 = value.s4567;
-    dst[index].s89ab = value.s89ab;
-    dst[index++].scdef = value.scdef;
-    dst[index].sfedc = value.s0123;
-    dst[index].sba98 = value.s4567;
-    dst[index].s7654 = value.s89ab;
-    dst[index++].wzyx = value.scdef;
-
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.z;
-    dst[index++] = value.w;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s8;
-    dst[index++] = value.s9;
-    dst[index++] = value.sa;
-    dst[index++] = value.sb;
-    dst[index++] = value.sc;
-    dst[index++] = value.sd;
-    dst[index++] = value.se;
-    dst[index++] = value.sf;
-    dst[index++] = (TYPE)(value.xyzw, value.s4567, value.s89abcdef);
-    dst[index++] = (TYPE)(value.sfedcba98, value.s7654, value.wzyx);
-}
-)CLC";
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // rgba only for first four components!
-
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].b = value.r;
-    dst[index++].a = value.r;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s8 = value.s0;
-    dst[index++].s9 = value.s0;
-    dst[index++].sa = value.s0;
-    dst[index++].sb = value.s0;
-    dst[index++].sc = value.s0;
-    dst[index++].sd = value.s0;
-    dst[index++].se = value.s0;
-    dst[index++].sf = value.s0;
-    dst[index].rgba = value.s0123;
-    dst[index].s4567 = value.s4567;
-    dst[index].s89ab = value.s89ab;
-    dst[index++].scdef = value.scdef;
-    dst[index].sfedc = value.s0123;
-    dst[index].sba98 = value.s4567;
-    dst[index].s7654 = value.s89ab;
-    dst[index++].abgr = value.scdef;
-
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.b;
-    dst[index++] = value.a;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s8;
-    dst[index++] = value.s9;
-    dst[index++] = value.sa;
-    dst[index++] = value.sb;
-    dst[index++] = value.sc;
-    dst[index++] = value.sd;
-    dst[index++] = value.se;
-    dst[index++] = value.sf;
-    dst[index++] = (TYPE)(value.rgba, value.s4567, value.s89abcdef);
-    dst[index++] = (TYPE)(value.sfedcba98, value.s7654, value.abgr);
-}
-)CLC";
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s2 = value.s0;
-    dst[index++].s3 = value.s0;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s8 = value.s0;
-    dst[index++].s9 = value.s0;
-    dst[index++].sa = value.s0;
-    dst[index++].sb = value.s0;
-    dst[index++].sc = value.s0;
-    dst[index++].sd = value.s0;
-    dst[index++].se = value.s0;
-    dst[index++].sf = value.s0;
-    dst[index++].s0123456789abcdef = value; // lower-case
-    dst[index++].sFEDCBA9876543210 = value; // upper-case
-
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s2;
-    dst[index++] = value.s3;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s8;
-    dst[index++] = value.s9;
-    dst[index++] = value.sa;
-    dst[index++] = value.sb;
-    dst[index++] = value.sc;
-    dst[index++] = value.sd;
-    dst[index++] = value.se;
-    dst[index++] = value.sf;
-    dst[index++] = value.s0123456789abcdef; // lower-case
-    dst[index++] = value.sFEDCBA9876543210; // upper-case
-}
-)CLC";
-};
-
-template <typename T, size_t N, size_t S>
-static void makeReference(std::vector<T>& ref)
-{
-    // N single channel lvalue tests
-    // 2 multi-value lvalue tests
-    // N single channel rvalue tests
-    // 2 multi-value rvalue tests
-    const size_t refSize = (N + 2 + N + 2) * S;
-
-    ref.resize(refSize);
-    std::fill(ref.begin(), ref.end(), 99);
-
-    size_t dstIndex = 0;
-
-    // single channel lvalue
-    for (size_t i = 0; i < N; i++)
-    {
-        ref[dstIndex * S + i] = 0;
-        ++dstIndex;
-    }
-
-    // normal lvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = c;
-    }
-    ++dstIndex;
-
-    // reverse lvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = N - c - 1;
-    }
-    ++dstIndex;
-
-    // single channel rvalue
-    for (size_t i = 0; i < N; i++)
-    {
-        for (size_t c = 0; c < N; c++)
-        {
-            ref[dstIndex * S + c] = i;
-        }
-        ++dstIndex;
-    }
-
-    // normal rvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = c;
-    }
-    ++dstIndex;
-
-    // reverse rvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = N - c - 1;
-    }
-    ++dstIndex;
-
-    assert(dstIndex * S == refSize);
-}
-
-template <typename T>
-static int
-test_vectype_case(const std::vector<T>& value, const std::vector<T>& reference,
-                  cl_context context, cl_kernel kernel, cl_command_queue queue)
-{
-    cl_int error = CL_SUCCESS;
-
-    clMemWrapper mem;
-
-    std::vector<T> buffer(reference.size(), 99);
-    mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                         buffer.size() * sizeof(T), buffer.data(), &error);
-    test_error(error, "Unable to create test buffer");
-
-    error = clSetKernelArg(kernel, 0, value.size() * sizeof(T), value.data());
-    test_error(error, "Unable to set value kernel arg");
-
-    error = clSetKernelArg(kernel, 1, sizeof(mem), &mem);
-    test_error(error, "Unable to set destination buffer kernel arg");
-
-    size_t global_work_size[] = { 1 };
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
-                                   NULL, 0, NULL, NULL);
-    test_error(error, "Unable to enqueue test kernel");
-
-    error = clFinish(queue);
-    test_error(error, "clFinish failed after test kernel");
-
-    error =
-        clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, buffer.size() * sizeof(T),
-                            buffer.data(), 0, NULL, NULL);
-    test_error(error, "Unable to read data after test kernel");
-
-    if (buffer != reference)
-    {
-        log_error("Result buffer did not match reference buffer!\n");
-        return TEST_FAIL;
-    }
-
-    return TEST_PASS;
-}
-
-template <typename T, size_t N>
-static int test_vectype(const char* type_name, cl_device_id device,
-                        cl_context context, cl_command_queue queue)
-{
-    log_info("    testing type %s%d\n", type_name, N);
-
-    cl_int error = CL_SUCCESS;
-    int result = TEST_PASS;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    std::string buildOptions{ "-DTYPE=" };
-    buildOptions += type_name;
-    buildOptions += std::to_string(N);
-    buildOptions += " -DBASETYPE=";
-    buildOptions += type_name;
-
-    constexpr size_t S = TestInfo<N>::vector_size;
-
-    std::vector<T> value(S);
-    std::iota(value.begin(), value.end(), 0);
-
-    std::vector<T> reference;
-    makeReference<T, N, S>(reference);
-
-    // XYZW swizzles:
-
-    const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
-    error = create_single_kernel_helper(
-        context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw",
-        buildOptions.c_str());
-    test_error(error, "Unable to create xyzw test kernel");
-
-    result |= test_vectype_case(value, reference, context, kernel, queue);
-
-    // sN swizzles:
-    const char* sN_source = TestInfo<N>::kernel_source_sN;
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &sN_source, "test_vector_swizzle_sN",
-                                        buildOptions.c_str());
-    test_error(error, "Unable to create sN test kernel");
-
-    result |= test_vectype_case(value, reference, context, kernel, queue);
-
-    // RGBA swizzles for OpenCL 3.0 and newer:
-    const Version device_version = get_device_cl_version(device);
-    if (device_version >= Version(3, 0))
-    {
-        const char* rgba_source = TestInfo<N>::kernel_source_rgba;
-        error = create_single_kernel_helper(
-            context, &program, &kernel, 1, &rgba_source,
-            "test_vector_swizzle_rgba", buildOptions.c_str());
-        test_error(error, "Unable to create rgba test kernel");
-
-        result |= test_vectype_case(value, reference, context, kernel, queue);
-    }
-
-    return result;
-}
-
-template <typename T>
-static int test_type(const char* type_name, cl_device_id device,
-                     cl_context context, cl_command_queue queue)
-{
-    return test_vectype<T, 2>(type_name, device, context, queue)
-        | test_vectype<T, 3>(type_name, device, context, queue)
-        | test_vectype<T, 4>(type_name, device, context, queue)
-        | test_vectype<T, 8>(type_name, device, context, queue)
-        | test_vectype<T, 16>(type_name, device, context, queue);
-}
-
-int test_vector_swizzle(cl_device_id device, cl_context context,
-                        cl_command_queue queue, int num_elements)
-{
-    int hasDouble = is_extension_available(device, "cl_khr_fp64");
-
-    int result = TEST_PASS;
-    result |= test_type<cl_char>("char", device, context, queue);
-    result |= test_type<cl_uchar>("uchar", device, context, queue);
-    result |= test_type<cl_short>("short", device, context, queue);
-    result |= test_type<cl_ushort>("ushort", device, context, queue);
-    result |= test_type<cl_int>("int", device, context, queue);
-    result |= test_type<cl_uint>("uint", device, context, queue);
-    if (gHasLong)
-    {
-        result |= test_type<cl_long>("long", device, context, queue);
-        result |= test_type<cl_ulong>("ulong", device, context, queue);
-    }
-    result |= test_type<cl_float>("float", device, context, queue);
-    if (hasDouble)
-    {
-        result |= test_type<cl_double>("double", device, context, queue);
-    }
-    return result;
-}

diff --git a/test_conformance/basic/test_wg_barrier.cpp b/test_conformance/basic/test_wg_barrier.cpp
index a237d80..7867512 100644
--- a/test_conformance/basic/test_wg_barrier.cpp
+++ b/test_conformance/basic/test_wg_barrier.cpp

@@ -87,9 +87,7 @@
     size_t max_threadgroup_size = 0;
     MTdata d;
 
-    err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum",
-        nullptr);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum", "-cl-std=CL2.0" );
     test_error(err, "Failed to build kernel/program.");
 
     err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
@@ -110,15 +108,11 @@
     input_ptr = (int*)malloc(sizeof(int) * num_elements);
     output_ptr = (int*)malloc(sizeof(int));
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[2] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
 
     d = init_genrand( gRandomSeed );

diff --git a/test_conformance/buffers/array_info.cpp b/test_conformance/buffers/array_info.cpp
index f143cf3..be33c00 100644
--- a/test_conformance/buffers/array_info.cpp
+++ b/test_conformance/buffers/array_info.cpp

@@ -33,8 +33,7 @@
     size_t          retSize;
     size_t          elementSize = sizeof( cl_int );
 
-    memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, elementSize * w * h * d,
-                            NULL, &err);
+    memobj = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  elementSize * w*h*d, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
 
     err = clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof( size_t ), (void *)&retSize, NULL);

diff --git a/test_conformance/buffers/main.cpp b/test_conformance/buffers/main.cpp
index 7c5502a..1a5c864 100644
--- a/test_conformance/buffers/main.cpp
+++ b/test_conformance/buffers/main.cpp

@@ -22,104 +22,104 @@
 #include "harness/testHarness.h"
 
 test_definition test_list[] = {
-    ADD_TEST(buffer_read_async_int),
-    ADD_TEST(buffer_read_async_uint),
-    ADD_TEST(buffer_read_async_long),
-    ADD_TEST(buffer_read_async_ulong),
-    ADD_TEST(buffer_read_async_short),
-    ADD_TEST(buffer_read_async_ushort),
-    ADD_TEST(buffer_read_async_char),
-    ADD_TEST(buffer_read_async_uchar),
-    ADD_TEST(buffer_read_async_float),
-    ADD_TEST(buffer_read_array_barrier_int),
-    ADD_TEST(buffer_read_array_barrier_uint),
-    ADD_TEST(buffer_read_array_barrier_long),
-    ADD_TEST(buffer_read_array_barrier_ulong),
-    ADD_TEST(buffer_read_array_barrier_short),
-    ADD_TEST(buffer_read_array_barrier_ushort),
-    ADD_TEST(buffer_read_array_barrier_char),
-    ADD_TEST(buffer_read_array_barrier_uchar),
-    ADD_TEST(buffer_read_array_barrier_float),
-    ADD_TEST(buffer_read_int),
-    ADD_TEST(buffer_read_uint),
-    ADD_TEST(buffer_read_long),
-    ADD_TEST(buffer_read_ulong),
-    ADD_TEST(buffer_read_short),
-    ADD_TEST(buffer_read_ushort),
-    ADD_TEST(buffer_read_float),
-    ADD_TEST(buffer_read_half),
-    ADD_TEST(buffer_read_char),
-    ADD_TEST(buffer_read_uchar),
-    ADD_TEST(buffer_read_struct),
-    ADD_TEST(buffer_read_random_size),
-    ADD_TEST(buffer_map_read_int),
-    ADD_TEST(buffer_map_read_uint),
-    ADD_TEST(buffer_map_read_long),
-    ADD_TEST(buffer_map_read_ulong),
-    ADD_TEST(buffer_map_read_short),
-    ADD_TEST(buffer_map_read_ushort),
-    ADD_TEST(buffer_map_read_char),
-    ADD_TEST(buffer_map_read_uchar),
-    ADD_TEST(buffer_map_read_float),
-    ADD_TEST(buffer_map_read_struct),
+    ADD_TEST( buffer_read_async_int ),
+    ADD_TEST( buffer_read_async_uint ),
+    ADD_TEST( buffer_read_async_long ),
+    ADD_TEST( buffer_read_async_ulong ),
+    ADD_TEST( buffer_read_async_short ),
+    ADD_TEST( buffer_read_async_ushort ),
+    ADD_TEST( buffer_read_async_char ),
+    ADD_TEST( buffer_read_async_uchar ),
+    ADD_TEST( buffer_read_async_float ),
+    ADD_TEST( buffer_read_array_barrier_int ),
+    ADD_TEST( buffer_read_array_barrier_uint ),
+    ADD_TEST( buffer_read_array_barrier_long ),
+    ADD_TEST( buffer_read_array_barrier_ulong ),
+    ADD_TEST( buffer_read_array_barrier_short ),
+    ADD_TEST( buffer_read_array_barrier_ushort ),
+    ADD_TEST( buffer_read_array_barrier_char ),
+    ADD_TEST( buffer_read_array_barrier_uchar ),
+    ADD_TEST( buffer_read_array_barrier_float ),
+    ADD_TEST( buffer_read_int ),
+    ADD_TEST( buffer_read_uint ),
+    ADD_TEST( buffer_read_long ),
+    ADD_TEST( buffer_read_ulong ),
+    ADD_TEST( buffer_read_short ),
+    ADD_TEST( buffer_read_ushort ),
+    ADD_TEST( buffer_read_float ),
+    NOT_IMPLEMENTED_TEST( buffer_read_half ),
+    ADD_TEST( buffer_read_char ),
+    ADD_TEST( buffer_read_uchar ),
+    ADD_TEST( buffer_read_struct ),
+    ADD_TEST( buffer_read_random_size ),
+    ADD_TEST( buffer_map_read_int ),
+    ADD_TEST( buffer_map_read_uint ),
+    ADD_TEST( buffer_map_read_long ),
+    ADD_TEST( buffer_map_read_ulong ),
+    ADD_TEST( buffer_map_read_short ),
+    ADD_TEST( buffer_map_read_ushort ),
+    ADD_TEST( buffer_map_read_char ),
+    ADD_TEST( buffer_map_read_uchar ),
+    ADD_TEST( buffer_map_read_float ),
+    ADD_TEST( buffer_map_read_struct ),
 
-    ADD_TEST(buffer_map_write_int),
-    ADD_TEST(buffer_map_write_uint),
-    ADD_TEST(buffer_map_write_long),
-    ADD_TEST(buffer_map_write_ulong),
-    ADD_TEST(buffer_map_write_short),
-    ADD_TEST(buffer_map_write_ushort),
-    ADD_TEST(buffer_map_write_char),
-    ADD_TEST(buffer_map_write_uchar),
-    ADD_TEST(buffer_map_write_float),
-    ADD_TEST(buffer_map_write_struct),
+    ADD_TEST( buffer_map_write_int ),
+    ADD_TEST( buffer_map_write_uint ),
+    ADD_TEST( buffer_map_write_long ),
+    ADD_TEST( buffer_map_write_ulong ),
+    ADD_TEST( buffer_map_write_short ),
+    ADD_TEST( buffer_map_write_ushort ),
+    ADD_TEST( buffer_map_write_char ),
+    ADD_TEST( buffer_map_write_uchar ),
+    ADD_TEST( buffer_map_write_float ),
+    ADD_TEST( buffer_map_write_struct ),
 
-    ADD_TEST(buffer_write_int),
-    ADD_TEST(buffer_write_uint),
-    ADD_TEST(buffer_write_short),
-    ADD_TEST(buffer_write_ushort),
-    ADD_TEST(buffer_write_char),
-    ADD_TEST(buffer_write_uchar),
-    ADD_TEST(buffer_write_float),
-    ADD_TEST(buffer_write_half),
-    ADD_TEST(buffer_write_long),
-    ADD_TEST(buffer_write_ulong),
-    ADD_TEST(buffer_write_struct),
-    ADD_TEST(buffer_write_async_int),
-    ADD_TEST(buffer_write_async_uint),
-    ADD_TEST(buffer_write_async_short),
-    ADD_TEST(buffer_write_async_ushort),
-    ADD_TEST(buffer_write_async_char),
-    ADD_TEST(buffer_write_async_uchar),
-    ADD_TEST(buffer_write_async_float),
-    ADD_TEST(buffer_write_async_long),
-    ADD_TEST(buffer_write_async_ulong),
-    ADD_TEST(buffer_copy),
-    ADD_TEST(buffer_partial_copy),
-    ADD_TEST(mem_read_write_flags),
-    ADD_TEST(mem_write_only_flags),
-    ADD_TEST(mem_read_only_flags),
-    ADD_TEST(mem_copy_host_flags),
-    ADD_TEST(mem_alloc_ref_flags),
-    ADD_TEST(array_info_size),
+    ADD_TEST( buffer_write_int ),
+    ADD_TEST( buffer_write_uint ),
+    ADD_TEST( buffer_write_short ),
+    ADD_TEST( buffer_write_ushort ),
+    ADD_TEST( buffer_write_char ),
+    ADD_TEST( buffer_write_uchar ),
+    ADD_TEST( buffer_write_float ),
+    NOT_IMPLEMENTED_TEST( buffer_write_half ),
+    ADD_TEST( buffer_write_long ),
+    ADD_TEST( buffer_write_ulong ),
+    ADD_TEST( buffer_write_struct ),
+    ADD_TEST( buffer_write_async_int ),
+    ADD_TEST( buffer_write_async_uint ),
+    ADD_TEST( buffer_write_async_short ),
+    ADD_TEST( buffer_write_async_ushort ),
+    ADD_TEST( buffer_write_async_char ),
+    ADD_TEST( buffer_write_async_uchar ),
+    ADD_TEST( buffer_write_async_float ),
+    ADD_TEST( buffer_write_async_long ),
+    ADD_TEST( buffer_write_async_ulong ),
+    ADD_TEST( buffer_copy ),
+    ADD_TEST( buffer_partial_copy ),
+    ADD_TEST( mem_read_write_flags ),
+    ADD_TEST( mem_write_only_flags ),
+    ADD_TEST( mem_read_only_flags ),
+    ADD_TEST( mem_copy_host_flags ),
+    NOT_IMPLEMENTED_TEST( mem_alloc_ref_flags ),
+    ADD_TEST( array_info_size ),
 
-    ADD_TEST(sub_buffers_read_write),
-    ADD_TEST(sub_buffers_read_write_dual_devices),
-    ADD_TEST(sub_buffers_overlapping),
+    ADD_TEST( sub_buffers_read_write ),
+    ADD_TEST( sub_buffers_read_write_dual_devices ),
+    ADD_TEST( sub_buffers_overlapping ),
 
-    ADD_TEST(buffer_fill_int),
-    ADD_TEST(buffer_fill_uint),
-    ADD_TEST(buffer_fill_short),
-    ADD_TEST(buffer_fill_ushort),
-    ADD_TEST(buffer_fill_char),
-    ADD_TEST(buffer_fill_uchar),
-    ADD_TEST(buffer_fill_long),
-    ADD_TEST(buffer_fill_ulong),
-    ADD_TEST(buffer_fill_float),
-    ADD_TEST(buffer_fill_struct),
+    ADD_TEST( buffer_fill_int ),
+    ADD_TEST( buffer_fill_uint ),
+    ADD_TEST( buffer_fill_short ),
+    ADD_TEST( buffer_fill_ushort ),
+    ADD_TEST( buffer_fill_char ),
+    ADD_TEST( buffer_fill_uchar ),
+    ADD_TEST( buffer_fill_long ),
+    ADD_TEST( buffer_fill_ulong ),
+    ADD_TEST( buffer_fill_float ),
+    ADD_TEST( buffer_fill_struct ),
 
-    ADD_TEST(buffer_migrate),
-    ADD_TEST(image_migrate),
+    ADD_TEST( buffer_migrate ),
+    ADD_TEST( image_migrate ),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
@@ -141,5 +141,5 @@
 
 int main( int argc, const char *argv[] )
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }

diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp
index 9c9c7d1..142b7da 100644
--- a/test_conformance/buffers/test_buffer_fill.cpp
+++ b/test_conformance/buffers/test_buffer_fill.cpp

@@ -24,6 +24,8 @@
 #include "procs.h"
 #include "harness/errorHelpers.h"
 
+#define USE_LOCAL_WORK_GROUP    1
+
 #define TEST_PRIME_CHAR        0x77
 #define TEST_PRIME_INT        ((1<<16)+1)
 #define TEST_PRIME_UINT        ((1U<<16)+1U)
@@ -562,13 +564,18 @@
                      int loops, void *inptr[5], void *hostptr[5], void *pattern[5], size_t offset_elements, size_t fill_elements,
                      const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
 {
+    cl_mem      buffers[10];
     void        *outptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event[2];
     size_t      ptrSizes[5];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     int         err;
-    int i;
+    int         i, ii;
     int         src_flag_id;
     int         total_errors = 0;
 
@@ -582,39 +589,23 @@
     ptrSizes[3] = ptrSizes[2] << 1;
     ptrSizes[4] = ptrSizes[3] << 1;
 
-    loops = (loops < 5 ? loops : 5);
-    for (i = 0; i < loops; i++)
-    {
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
 
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clEventWrapper event[2];
-            clMemWrapper buffers[2];
+        loops = ( loops < 5 ? loops : 5 );
+        for ( i = 0; i < loops; i++ ){
+            ii = i << 1;
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
-                                            ptrSizes[i] * num_elements,
-                                            hostptr[i], &err);
+                buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, hostptr[i], &err);
             else
-                buffers[0] =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, NULL, &err);
-            if (!buffers[0] || err)
-            {
+                buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( !buffers[ii] || err){
                 print_error(err, "clCreateBuffer failed\n" );
                 return -1;
             }
             // Initialize source buffer with 0, since the validation code expects 0(s) outside of the fill region.
             if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
-                err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0,
-                                           ptrSizes[i] * num_elements,
-                                           hostptr[i], 0, NULL, NULL);
+                err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, hostptr[i], 0, NULL, NULL);
                 if ( err != CL_SUCCESS ){
                     print_error(err, "clEnqueueWriteBuffer failed\n" );
                     return -1;
@@ -623,33 +614,50 @@
 
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             memset(outptr[i], 0, ptrSizes[i] * num_elements);
-            buffers[1] =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               ptrSizes[i] * num_elements, outptr[i], &err);
-            if (!buffers[1] || err)
-            {
+            buffers[ii+1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  ptrSizes[i] * num_elements, outptr[i], &err);
+            if ( !buffers[ii+1] || err){
                 print_error(err, "clCreateBuffer failed\n" );
+                clReleaseMemObject( buffers[ii] );
                 align_free( outptr[i] );
                 return -1;
             }
 
-            err = clEnqueueFillBuffer(
-                queue, buffers[0], pattern[i], ptrSizes[i],
-                ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements, 0,
-                NULL, &(event[0]));
-
+            err = clEnqueueFillBuffer(queue, buffers[ii], pattern[i], ptrSizes[i],
+                                      ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements,
+                                      0, NULL, &(event[0]));
+            /* uncomment for test debugging
+             err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
+             */
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueFillBuffer failed" );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
                 align_free( outptr[i] );
                 return -1;
             }
 
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
-                                 (void *)&buffers[0]);
-            err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
-                                  (void *)&buffers[1]);
+            err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+#endif
+
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+            err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clSetKernelArg failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
                 align_free( outptr[i] );
                 return -1;
             }
@@ -657,19 +665,26 @@
             err = clWaitForEvents(  1, &(event[0]) );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
                 align_free( outptr[i] );
                 return -1;
             }
+            clReleaseEvent(event[0]);
 
+#ifdef USE_LOCAL_WORK_GROUP
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
             if (err != CL_SUCCESS){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
                 return -1;
             }
 
-            err = clEnqueueReadBuffer(queue, buffers[1], false, 0,
-                                      ptrSizes[i] * num_elements, outptr[i], 0,
-                                      NULL, &(event[1]));
+            err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
             if (err != CL_SUCCESS){
                 print_error( err, "clEnqueueReadBuffer failed" );
                 return -1;
@@ -679,18 +694,21 @@
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
             }
+            clReleaseEvent(event[1]);
 
             if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
-                log_error(" %s%d test failed. (cl_mem_flags: %s)\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error( " %s%d test failed\n", type, 1<<i );
                 total_errors++;
             }
             else{
-                log_info(" %s%d test passed (cl_mem_flags: %s)\n", type, 1 << i,
-                         flag_set_names[src_flag_id]);
+                log_info( " %s%d test passed\n", type, 1<<i );
             }
 
             // cleanup
+            clReleaseMemObject( buffers[ii] );
+            clReleaseMemObject( buffers[ii+1] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
         }
     } // src cl_mem_flag
@@ -702,11 +720,19 @@
 
 int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-    TestStruct pattern;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
+    cl_mem      buffers[2];
+    void        *outptr;
+    TestStruct  *inptr;
+    TestStruct  *hostptr;
+    TestStruct  *pattern;
+    cl_program  program;
+    cl_kernel   kernel;
+    cl_event    event[2];
     size_t      ptrSize = sizeof( TestStruct );
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     int         n, err;
     size_t      j, offset_elements, fill_elements;
     int         src_flag_id;
@@ -717,63 +743,35 @@
 
     global_work_size[0] = (size_t)num_elements;
 
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
 
-    for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-    {
-        log_info("Testing with cl_mem_flags: %s\n",
-                 flag_set_names[src_flag_id]);
+        pattern = (TestStruct *)malloc(ptrSize);
+        pattern->a = (cl_int)genrand_int32(d);
+        pattern->b = (cl_float)get_random_float( -FLT_MAX, FLT_MAX, d );
 
-        err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                          &struct_kernel_code,
-                                          "read_fill_struct");
-        if (err)
-        {
-            log_error(" Error creating program for struct\n");
-            free_mtdata(d);
-            return -1;
+        inptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
+        for ( j = 0; j < offset_elements; j++ ) {
+            inptr[j].a = 0;
+            inptr[j].b =0;
+        }
+        for ( j = offset_elements; j < offset_elements + fill_elements; j++ ) {
+            inptr[j].a = pattern->a;
+            inptr[j].b = pattern->b;
+        }
+        for ( j = offset_elements + fill_elements; j < (size_t)num_elements; j++ ) {
+            inptr[j].a = 0;
+            inptr[j].b = 0;
         }
 
-        // Test with random offsets and fill sizes
-        for (n = 0; n < 8; n++)
-        {
-            clEventWrapper event[2];
-            clMemWrapper buffers[2];
-            void *outptr;
-            TestStruct *inptr;
-            TestStruct *hostptr;
+        hostptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
+        memset(hostptr, 0, ptrSize * num_elements);
 
-            offset_elements =
-                (size_t)get_random_float(0.f, (float)(num_elements - 8), d);
-            fill_elements = (size_t)get_random_float(
-                8.f, (float)(num_elements - offset_elements), d);
-            log_info("Testing random fill from offset %d for %d elements: \n",
-                     (int)offset_elements, (int)fill_elements);
-
-            pattern.a = (cl_int)genrand_int32(d);
-            pattern.b = (cl_float)get_random_float(-FLT_MAX, FLT_MAX, d);
-
-            inptr = (TestStruct *)align_malloc(ptrSize * num_elements,
-                                               min_alignment);
-            for (j = 0; j < offset_elements; j++)
-            {
-                inptr[j].a = 0;
-                inptr[j].b = 0;
-            }
-            for (j = offset_elements; j < offset_elements + fill_elements; j++)
-            {
-                inptr[j].a = pattern.a;
-                inptr[j].b = pattern.b;
-            }
-            for (j = offset_elements + fill_elements; j < (size_t)num_elements;
-                 j++)
-            {
-                inptr[j].a = 0;
-                inptr[j].b = 0;
-            }
-
-            hostptr = (TestStruct *)align_malloc(ptrSize * num_elements,
-                                                 min_alignment);
-            memset(hostptr, 0, ptrSize * num_elements);
+        for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+            log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
 
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, hostptr, &err);
@@ -781,6 +779,9 @@
                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, NULL, &err);
             if ( err ){
                 print_error(err, " clCreateBuffer failed\n" );
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 free_mtdata(d);
@@ -790,6 +791,9 @@
                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, hostptr, 0, NULL, NULL);
                 if ( err != CL_SUCCESS ){
                     print_error(err, " clEnqueueWriteBuffer failed\n" );
+                    clReleaseEvent( event[0] );
+                    clReleaseEvent( event[1] );
+                    free( (void *)pattern );
                     align_free( (void *)inptr );
                     align_free( (void *)hostptr );
                     free_mtdata(d);
@@ -802,32 +806,68 @@
             if ( ! buffers[1] || err){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 free_mtdata(d);
                 return -1;
             }
 
-            err = clEnqueueFillBuffer(
-                queue, buffers[0], &pattern, ptrSize, ptrSize * offset_elements,
-                ptrSize * fill_elements, 0, NULL, &(event[0]));
+            err = clEnqueueFillBuffer(queue, buffers[0], pattern, ptrSize,
+                                      ptrSize * offset_elements, ptrSize * fill_elements,
+                                      0, NULL, &(event[0]));
             /* uncomment for test debugging
              err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, inptr, 0, NULL, &(event[0]));
              */
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueFillBuffer failed" );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 free_mtdata(d);
                 return -1;
             }
 
+            err = create_single_kernel_helper( context, &program, &kernel, 1, &struct_kernel_code, "read_fill_struct" );
+            if ( err ){
+                log_error( " Error creating program for struct\n" );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel, global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+#endif
+
             err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *)&buffers[0] );
             err |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *)&buffers[1] );
             if ( err != CL_SUCCESS ){
                 print_error( err, " clSetKernelArg failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 free_mtdata(d);
@@ -837,17 +877,36 @@
             err = clWaitForEvents(  1, &(event[0]) );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 free_mtdata(d);
                 return -1;
             }
+            clReleaseEvent( event[0] );
 
+#ifdef USE_LOCAL_WORK_GROUP
+            err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
             err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueNDRangeKernel failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 free_mtdata(d);
@@ -857,7 +916,14 @@
             err = clEnqueueReadBuffer( queue, buffers[1], CL_FALSE, 0, ptrSize * num_elements, outptr, 0, NULL, &(event[1]) );
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueReadBuffer failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 free_mtdata(d);
@@ -868,6 +934,7 @@
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
             }
+            clReleaseEvent( event[1] );
 
             if ( verify_fill_struct( inptr, outptr, num_elements) ) {
                 log_error( " buffer_FILL async struct test failed\n" );
@@ -877,10 +944,15 @@
                 log_info( " buffer_FILL async struct test passed\n" );
             }
             // cleanup
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
             align_free( outptr );
-            align_free((void *)inptr);
-            align_free((void *)hostptr);
+            clReleaseMemObject( buffers[0] );
+            clReleaseMemObject( buffers[1] );
         } // src cl_mem_flag
+        free( (void *)pattern );
+        align_free( (void *)inptr );
+        align_free( (void *)hostptr );
     }
 
     free_mtdata(d);

diff --git a/test_conformance/buffers/test_buffer_map.cpp b/test_conformance/buffers/test_buffer_map.cpp
index 382c7a3..eebd30c 100644
--- a/test_conformance/buffers/test_buffer_map.cpp
+++ b/test_conformance/buffers/test_buffer_map.cpp

@@ -554,9 +554,10 @@
 static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                                  const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
 {
+    cl_mem      buffers[5];
     void        *outptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      threads[3], localThreads[3];
     cl_int      err;
     int         i;
@@ -579,20 +580,10 @@
     if (! gHasLong && strstr(type,"long"))
         return 0;
 
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
 
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
-
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes of memory\n", (int)ptrSizes[i] * num_elements );
@@ -600,24 +591,30 @@
             }
 
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, outptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
             else
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
 
-            if (!buffer || err)
-            {
+            if ( ! buffers[i] | err){
                 print_error(err, "clCreateBuffer failed\n" );
                 align_free( outptr[i] );
                 return -1;
             }
 
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
+            err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                return -1;
+            }
 
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clSetKernelArg failed\n" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 return -1;
             }
@@ -630,34 +627,39 @@
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed\n" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 return -1;
             }
 
-            mappedPtr = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_READ,
-                                           0, ptrSizes[i] * num_elements, 0,
-                                           NULL, NULL, &err);
-            if (err != CL_SUCCESS)
-            {
+            mappedPtr = clEnqueueMapBuffer(queue, buffers[i], CL_TRUE, CL_MAP_READ, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
+            if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueMapBuffer failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 return -1;
             }
 
             if (fn(mappedPtr, num_elements*(1<<i))){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error(" %s%d test failed\n", type, 1<<i);
                 total_errors++;
             }
             else{
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info(" %s%d test passed\n", type, 1<<i);
             }
 
-            err = clEnqueueUnmapMemObject(queue, buffer, mappedPtr, 0, NULL,
-                                          NULL);
+            err = clEnqueueUnmapMemObject(queue, buffers[i], mappedPtr, 0, NULL, NULL);
             test_error(err, "clEnqueueUnmapMemObject failed");
 
+            // cleanup
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( buffers[i] );
+
             // If we are using the outptr[i] as backing via USE_HOST_PTR we need to make sure we are done before freeing.
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)) {
                 err = clFinish(queue);

diff --git a/test_conformance/buffers/test_buffer_mem.cpp b/test_conformance/buffers/test_buffer_mem.cpp
index 52eb723..a471c0f 100644
--- a/test_conformance/buffers/test_buffer_mem.cpp
+++ b/test_conformance/buffers/test_buffer_mem.cpp

@@ -27,6 +27,8 @@
 typedef unsigned char uchar;
 #endif
 
+#define USE_LOCAL_WORK_GROUP 1
+
 
 const char *mem_read_write_kernel_code =
 "__kernel void test_mem_read_write(__global int *dst)\n"
@@ -37,12 +39,12 @@
 "}\n";
 
 const char *mem_read_kernel_code =
-    "__kernel void test_mem_read(__global int *dst, __global int *src)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = src[tid]+1;\n"
-    "}\n";
+"__kernel void test_mem_read(__global int *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid]+1;\n"
+"}\n";
 
 const char *mem_write_kernel_code =
 "__kernel void test_mem_write(__global int *dst)\n"
@@ -66,179 +68,457 @@
 }
 
 
-int test_mem_flags(cl_context context, cl_command_queue queue, int num_elements,
-                   cl_mem_flags flags, const char **kernel_program,
-                   const char *kernel_name)
+
+int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-    clMemWrapper buffers[2];
+    cl_mem      buffers[1];
     cl_int      *inptr, *outptr;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
     int         i;
 
     size_t      min_alignment = get_min_alignment(context);
-    bool test_read_only = (flags & CL_MEM_READ_ONLY) != 0;
-    bool test_write_only = (flags & CL_MEM_WRITE_ONLY) != 0;
-    bool copy_host_ptr = (flags & CL_MEM_COPY_HOST_PTR) != 0;
 
     global_work_size[0] = (cl_uint)num_elements;
 
     inptr = (cl_int*)align_malloc(sizeof(cl_int)  * num_elements, min_alignment);
-    if (!inptr)
-    {
-        log_error(" unable to allocate %d bytes of memory\n",
-                  (int)sizeof(cl_int) * num_elements);
-        return -1;
-    }
     outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
-    if (!outptr)
-    {
-        log_error(" unable to allocate %d bytes of memory\n",
-                  (int)sizeof(cl_int) * num_elements);
-        align_free((void *)inptr);
-        return -1;
-    }
-
-    for (i = 0; i < num_elements; i++) inptr[i] = i;
-
-    buffers[0] = clCreateBuffer(context, flags, sizeof(cl_int) * num_elements,
-                                copy_host_ptr ? inptr : NULL, &err);
-    if (err != CL_SUCCESS)
-    {
-        print_error(err, "clCreateBuffer failed");
-        align_free((void *)outptr);
-        align_free((void *)inptr);
-        return -1;
-    }
-    if (!copy_host_ptr)
-    {
-        err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
-                                   sizeof(cl_int) * num_elements, (void *)inptr,
-                                   0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            print_error(err, "clEnqueueWriteBuffer failed");
-            align_free((void *)outptr);
-            align_free((void *)inptr);
-            return -1;
-        }
-    }
-
-    if (test_read_only)
-    {
-        /* The read only buffer for mem_read_only_flags should be created above
-        with the correct flags as in other tests. However to make later test
-        code simpler, the additional read_write buffer required is stored as
-        the first buffer */
-        buffers[1] = buffers[0];
-        buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(cl_int) * num_elements, NULL, &err);
-        if (err != CL_SUCCESS)
-        {
-            print_error(err, " clCreateBuffer failed \n");
-            align_free((void *)inptr);
-            align_free((void *)outptr);
-            return -1;
-        }
-    }
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      kernel_program, kernel_name);
-    if (err){
-        print_error(err, "creating kernel failed");
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err);
+    if (err != CL_SUCCESS) {
+        print_error( err, "clCreateBuffer failed");
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
     }
 
-    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&buffers[0]);
-    if (test_read_only && (err == CL_SUCCESS))
-    {
-        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&buffers[1]);
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS) {
+        print_error( err, "clEnqueueWriteBuffer failed");
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
     }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
     if ( err != CL_SUCCESS ){
         print_error( err, "clSetKernelArg failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
     }
 
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL,
-                                 0, NULL, NULL);
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
     if (err != CL_SUCCESS){
         log_error("clEnqueueNDRangeKernel failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
     }
 
-    err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
-                              sizeof(cl_int) * num_elements, (void *)outptr, 0,
-                              NULL, NULL);
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
     if ( err != CL_SUCCESS ){
         print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
     }
 
-    if (!test_write_only)
-    {
-        if (verify_mem(outptr, num_elements))
-        {
-            log_error("test failed\n");
-            err = -1;
-        }
-        else
-        {
-            log_info("test passed\n");
-            err = 0;
-        }
+    if (verify_mem(outptr, num_elements)){
+        log_error("buffer_MEM_READ_WRITE test failed\n");
+        err = -1;
+    }
+    else{
+        log_info("buffer_MEM_READ_WRITE test passed\n");
+        err = 0;
     }
 
     // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
     align_free( (void *)outptr );
     align_free( (void *)inptr );
 
     return err;
-} // end test_mem_flags()
+}   // end test_mem_read_write()
 
-int test_mem_read_write_flags(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements)
+
+int test_mem_write_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-    return test_mem_flags(context, queue, num_elements, CL_MEM_READ_WRITE,
-                          &mem_read_write_kernel_code, "test_mem_read_write");
-}
+    cl_mem      buffers[1];
+    int         *inptr, *outptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
 
+    size_t      min_alignment = get_min_alignment(context);
 
-int test_mem_write_only_flags(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements)
-{
-    return test_mem_flags(context, queue, num_elements, CL_MEM_WRITE_ONLY,
-                          &mem_write_kernel_code, "test_mem_write");
-}
+    global_work_size[0] = (cl_uint)num_elements;
+
+    inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+    outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
+    if (err != CL_SUCCESS)
+    {
+        print_error(err, "clCreateBuffer failed\n");
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS){
+        print_error( err, "clEnqueueWriteBuffer failed" );
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArg failed");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "Error reading array" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)outptr );
+    align_free( (void *)inptr );
+
+    return err;
+}   // end test_mem_write()
 
 
 int test_mem_read_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-    return test_mem_flags(context, queue, num_elements, CL_MEM_READ_ONLY,
-                          &mem_read_kernel_code, "test_mem_read");
-}
+    cl_mem      buffers[2];
+    int         *inptr, *outptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+    outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
+    if ( err != CL_SUCCESS ){
+        print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
+    if ( err != CL_SUCCESS ){
+        print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueWriteBuffer() failed");
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
+    if ( err ){
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArgs failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if (err != CL_SUCCESS){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    if (verify_mem(outptr, num_elements)){
+        log_error( " CL_MEM_READ_ONLY test failed\n" );
+        err = -1;
+    }
+    else{
+        log_info( " CL_MEM_READ_ONLY test passed\n" );
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[1]) ;
+    clReleaseMemObject( buffers[0]) ;
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)inptr );
+    align_free( (void *)outptr );
+
+    return err;
+
+}   // end test_mem_read()
 
 
 int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-    return test_mem_flags(context, queue, num_elements,
-                          CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE,
-                          &mem_read_write_kernel_code, "test_mem_read_write");
-}
+    cl_mem      buffers[1];
+    int         *ptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
 
-int test_mem_alloc_ref_flags(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements)
-{
-    return test_mem_flags(context, queue, num_elements,
-                          CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                          &mem_read_write_kernel_code, "test_mem_read_write");
-}
+    size_t min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! ptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = i;
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err);
+    if (err != CL_SUCCESS){
+        print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n");
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if (err != CL_SUCCESS){
+        log_error("clSetKernelArgs failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if (err != CL_SUCCESS){
+        log_error("clEnqueueNDRangeKernel failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS){
+        log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+    if ( verify_mem( ptr, num_elements ) ){
+        log_error("CL_MEM_COPY_HOST_PTR test failed\n");
+        err = -1;
+    }
+    else{
+        log_info("CL_MEM_COPY_HOST_PTR test passed\n");
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)ptr );
+
+    return err;
+
+}   // end test_mem_copy_host_flags()
+

diff --git a/test_conformance/buffers/test_buffer_migrate.cpp b/test_conformance/buffers/test_buffer_migrate.cpp
index f309836..a5b6f26 100644
--- a/test_conformance/buffers/test_buffer_migrate.cpp
+++ b/test_conformance/buffers/test_buffer_migrate.cpp

@@ -295,9 +295,9 @@
             }
 
             if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 1, NULL, wgs, NULL, 0, NULL, NULL)) != CL_SUCCESS) {
-                print_error(err, "Failed enqueuing the NDRange kernel.");
-                failed = 1;
-                goto cleanup;
+              print_error(err, "Failed enqueueing the NDRange kernel.");
+              failed = 1;
+              goto cleanup;
             }
           }
           // Verify the results as long as neither input is an undefined migration

diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp
index 39cf329..de5bdf9 100644
--- a/test_conformance/buffers/test_buffer_read.cpp
+++ b/test_conformance/buffers/test_buffer_read.cpp

@@ -21,7 +21,6 @@
 #include <time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <CL/cl_half.h>
 
 #include "procs.h"
 
@@ -326,7 +325,6 @@
 
 
 static const char *buffer_read_half_kernel_code[] = {
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half(__global half *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
@@ -334,7 +332,6 @@
     "    dst[tid] = (half)119;\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
@@ -342,7 +339,6 @@
     "    dst[tid] = (half)119;\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
@@ -350,7 +346,6 @@
     "    dst[tid] = (half)119;\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
@@ -358,14 +353,12 @@
     "    dst[tid] = (half)119;\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
     "    dst[tid] = (half)119;\n"
-    "}\n"
-};
+    "}\n" };
 
 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
 
@@ -564,11 +557,11 @@
 static int verify_read_half( void *ptr, int n )
 {
     int     i;
-    cl_half *outptr = (cl_half *)ptr;
+    float   *outptr = (float *)ptr; // FIXME: should this be cl_half_float?
 
-    for (i = 0; i < n; i++)
-    {
-        if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
+    for ( i = 0; i < n / 2; i++ ){
+        if ( outptr[i] != TEST_PRIME_HALF )
+            return -1;
     }
 
     return 0;
@@ -621,11 +614,15 @@
 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                       const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
 {
+    cl_mem      buffers[5];
     void        *outptr[5];
     void        *inptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
     int         i;
     size_t      ptrSizes[5];
@@ -649,21 +646,10 @@
         return CL_SUCCESS;
     }
 
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
 
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error("Creating program for %s\n", type);
-            print_error(err, " Error creating program ");
-            return -1;
-        }
-
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
@@ -677,63 +663,80 @@
 
 
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, inptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
             else
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
-            if (err != CL_SUCCESS)
-            {
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
+            err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error("Creating program for %s\n", type);
+                print_error(err,  " Error creating program " );
+                clReleaseMemObject(buffers[i]);
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clSetKernelArg failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
-            err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL,
-                                         global_work_size, NULL, 0, NULL, NULL);
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
-            err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
-                                      ptrSizes[i] * num_elements, outptr[i], 0,
-                                      NULL, NULL);
+            err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
             if (fn(outptr[i], num_elements*(1<<i))){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error( " %s%d test failed\n", type, 1<<i );
                 total_errors++;
             }
             else{
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info( " %s%d test passed\n", type, 1<<i );
             }
 
-            err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
-                                      ptrSizes[i] * num_elements, inptr[i], 0,
-                                      NULL, NULL);
-            if (err != CL_SUCCESS)
-            {
+            err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL );
+            if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
@@ -749,6 +752,9 @@
 
 
             // cleanup
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
             align_free( inptr[i] );
         }
@@ -761,12 +767,16 @@
 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                             const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
 {
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
-    clEventWrapper event;
+    cl_mem      buffers[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event;
     void        *outptr[5];
     void        *inptr[5];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
     int         i;
     size_t      lastIndex;
@@ -791,20 +801,10 @@
         return CL_SUCCESS;
     }
 
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
 
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
-
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
@@ -820,12 +820,9 @@
 
 
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, inptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
             else
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
             if ( err != CL_SUCCESS ){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr[i] );
@@ -833,26 +830,46 @@
                 return -1;
             }
 
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
-            if ( err != CL_SUCCESS ){
-                print_error( err, "clSetKernelArg failed" );
+            err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i]);
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArg failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
-            err = clEnqueueReadBuffer(queue, buffer, false, 0,
-                                      ptrSizes[i] * num_elements, outptr[i], 0,
-                                      NULL, &event);
+            err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &event );
 #ifdef CHECK_FOR_NON_WAIT
             if ( ((uchar *)outptr[i])[lastIndex] ){
                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
@@ -861,6 +878,9 @@
 #endif
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
@@ -868,22 +888,27 @@
             err = clWaitForEvents(1, &event );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
             if ( fn(outptr[i], num_elements*(1<<i)) ){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error( " %s%d test failed\n", type, 1<<i );
                 total_errors++;
             }
             else{
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info( " %s%d test passed\n", type, 1<<i );
             }
 
             // cleanup
+            clReleaseEvent( event );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
             align_free( inptr[i] );
         }
@@ -898,11 +923,15 @@
 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                                     const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
 {
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
-    clEventWrapper event;
+    cl_mem      buffers[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event;
     void        *outptr[5], *inptr[5];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
     int         i;
     size_t      lastIndex;
@@ -927,20 +956,10 @@
         return CL_SUCCESS;
     }
 
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
 
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
-
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
@@ -955,12 +974,9 @@
             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
 
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, inptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
             else
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
             if ( err != CL_SUCCESS ){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr[i] );
@@ -968,26 +984,46 @@
                 return -1;
             }
 
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
-            if ( err != CL_SUCCESS ){
-                print_error( err, "clSetKernelArgs failed" );
+            err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArgs failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
-            err = clEnqueueReadBuffer(queue, buffer, false, 0,
-                                      ptrSizes[i] * num_elements,
-                                      (void *)(outptr[i]), 0, NULL, &event);
+            err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, (void *)(outptr[i]), 0, NULL, &event );
 #ifdef CHECK_FOR_NON_WAIT
             if ( ((uchar *)outptr[i])[lastIndex] ){
                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
@@ -996,6 +1032,9 @@
 #endif
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
@@ -1003,6 +1042,9 @@
             err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 return -1;
             }
@@ -1010,22 +1052,27 @@
             err = clWaitForEvents(1, &event);
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             }
 
             if ( fn(outptr[i], num_elements*(1<<i)) ){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error(" %s%d test failed\n", type, 1<<i);
                 total_errors++;
             }
             else{
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info(" %s%d test passed\n", type, 1<<i);
             }
 
             // cleanup
+            clReleaseEvent( event );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
             align_free( inptr[i] );
         }
@@ -1052,10 +1099,8 @@
 DECLARE_READ_TEST(char, cl_char)
 DECLARE_READ_TEST(uchar, cl_uchar)
 
-int test_buffer_read_half(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
+int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-    PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
     return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
                              buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
 }
@@ -1096,6 +1141,76 @@
 DECLARE_BARRIER_TEST(uchar, cl_uchar)
 DECLARE_BARRIER_TEST(float, cl_float)
 
+/*
+ int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
+ {
+ cl_mem        buffers[1];
+ float        *outptr;
+ cl_program program[1];
+ cl_kernel    kernel[1];
+ void        *values[1];
+ size_t        sizes[1] = { sizeof(cl_buffer) };
+ uint        threads[1];
+ int        err;
+ int        i;
+ size_t        ptrSize;    // sizeof(half)
+
+ ptrSize = sizeof(cl_float)/2;
+ outptr = (float *)malloc(ptrSize * num_elements);
+ buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSize * num_elements, NULL);
+ if( !buffers[0] ){
+ log_error("clCreateBuffer failed\n");
+ return -1;
+ }
+
+ err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
+ if( err ){
+ log_error( " Error creating program for half\n" );
+ clReleaseMemObject(buffers[0]);
+ free( (void *)outptr );
+ return -1;
+ }
+
+ values[0] = buffers[0];
+ err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
+ if( err != CL_SUCCESS ){
+ log_error("clSetKernelArgs failed\n");
+ return -1;
+ }
+
+ global_work_size[0] = (cl_uint)num_elements;
+ err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ log_error("clEnqueueNDRangeKernel failed\n");
+ return -1;
+ }
+
+ err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ log_error("clEnqueueReadBuffer failed: %d\n", err);
+ return -1;
+ }
+
+ if( verify_read_half( outptr, num_elements >> 1 ) ){
+ log_error( "buffer_READ half test failed\n" );
+ err = -1;
+ }
+ else{
+ log_info( "buffer_READ half test passed\n" );
+ err = 0;
+ }
+
+ // cleanup
+ clReleaseMemObject( buffers[0] );
+ clReleaseKernel( kernel[0] );
+ clReleaseProgram( program[0] );
+ free( (void *)outptr );
+
+ return err;
+
+ }    // end test_buffer_half_read()
+ */
+
 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     cl_mem      buffers[1];
@@ -1103,6 +1218,9 @@
     cl_program  program[1];
     cl_kernel   kernel[1];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
     size_t      objSize = sizeof(TestStruct);
 
@@ -1115,8 +1233,7 @@
         log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
         return -1;
     }
-    buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                objSize * num_elements, NULL, &err);
+    buffers[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL , &err);
     if ( err != CL_SUCCESS ){
         print_error( err, " clCreateBuffer failed\n" );
         align_free( output_ptr );
@@ -1140,7 +1257,14 @@
         return -1;
     }
 
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
     if ( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed" );
         clReleaseMemObject( buffers[0] );
@@ -1186,6 +1310,9 @@
     cl_program  program[3];
     cl_kernel   kernel[3];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
     int         i, j;
     size_t      ptrSizes[3];    // sizeof(int), sizeof(int2), sizeof(int4)
@@ -1207,8 +1334,7 @@
             }
             return -1;
         }
-        buffers[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    ptrSizes[i] * num_elements, NULL, &err);
+        buffers[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err);
         if ( err != CL_SUCCESS ){
             print_error(err, " clCreateBuffer failed\n" );
             for ( j = 0; j < i; j++ ){
@@ -1267,7 +1393,14 @@
             return -1;
         }
 
+#ifdef USE_LOCAL_WORK_GROUP
+        err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+        test_error( err, "Unable to get work group size to use" );
+
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
         if ( err != CL_SUCCESS ){
             print_error( err, "clEnqueueNDRangeKernel failed" );
             clReleaseMemObject( buffers[i] );

diff --git a/test_conformance/buffers/test_buffer_write.cpp b/test_conformance/buffers/test_buffer_write.cpp
index e57e1c1..0f67280 100644
--- a/test_conformance/buffers/test_buffer_write.cpp
+++ b/test_conformance/buffers/test_buffer_write.cpp

@@ -24,6 +24,7 @@
 #include "procs.h"
 #include "harness/errorHelpers.h"
 
+#define USE_LOCAL_WORK_GROUP    1
 
 #ifndef uchar
 typedef unsigned char uchar;
@@ -314,51 +315,40 @@
 
 
 const char *buffer_write_half_kernel_code[] = {
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half(__global half *src, __global half "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half( tid * 2, src );\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half2(__global half2 *src, __global half2 "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half2( tid * 2, src );\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half4(__global half4 *src, __global half4 "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half4( tid * 2, src );\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half8(__global half8 *src, __global half8 "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half8( tid * 2, src );\n"
     "}\n",
 
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half16(__global half16 *src, __global "
-    "half16 *dst)\n"
+    "__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
-    "    dst[tid] = src[tid];\n"
-    "}\n"
-};
+    "    dst[tid] = vload_half16( tid * 2, src );\n"
+    "}\n" };
 
 static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
 
@@ -564,8 +554,8 @@
 static int verify_write_half( void *ptr1, void *ptr2, int n )
 {
     int     i;
-    cl_half *inptr = (cl_half *)ptr1;
-    cl_half *outptr = (cl_half *)ptr2;
+    cl_ushort   *inptr = (cl_ushort *)ptr1;
+    cl_ushort   *outptr = (cl_ushort *)ptr2;
 
     for ( i = 0; i < n; i++ ){
         if ( outptr[i] != inptr[i] )
@@ -624,13 +614,17 @@
 int test_buffer_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                        void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
 {
+    cl_mem      buffers[10];
     void        *outptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      ptrSizes[5];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
-    int i;
+    int         i, ii;
     int         src_flag_id, dst_flag_id;
     int         total_errors = 0;
 
@@ -644,34 +638,19 @@
     ptrSizes[3] = ptrSizes[2] << 1;
     ptrSizes[4] = ptrSizes[3] << 1;
 
-    loops = (loops < 5 ? loops : 5);
-    for (i = 0; i < loops; i++)
-    {
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
-            {
-                clMemWrapper buffers[2];
-
+            loops = ( loops < 5 ? loops : 5 );
+            for ( i = 0; i < loops; i++ ){
+                ii = i << 1;
                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                    buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
-                                                ptrSizes[i] * num_elements,
-                                                inptr[i], &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
                 else
-                    buffers[0] =
-                        clCreateBuffer(context, flag_set[src_flag_id],
-                                       ptrSizes[i] * num_elements, NULL, &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
 
-                if (!buffers[0] || err)
-                {
+                if ( ! buffers[ii] || err){
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
                     return -1;
@@ -679,26 +658,19 @@
                 if ( ! strcmp( type, "half" ) ){
                     outptr[i] = align_malloc( ptrSizes[i] * (num_elements * 2 ), min_alignment);
                     if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * 2 * num_elements, outptr[i], &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, outptr[i], &err);
                     else
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * 2 * num_elements, NULL, &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, NULL, &err);
                 }
                 else{
                     outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
                     if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * num_elements, outptr[i], &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
                     else
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * num_elements, NULL, &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
                 }
                 if ( err ){
+                    clReleaseMemObject(buffers[ii]);
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
                     return -1;
@@ -706,77 +678,106 @@
 
                 if (gTestMap) {
                     void *dataPtr;
-                    dataPtr = clEnqueueMapBuffer(
-                        queue, buffers[0], CL_TRUE, CL_MAP_WRITE, 0,
-                        ptrSizes[i] * num_elements, 0, NULL, NULL, &err);
+                    dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
                     if (err) {
                         print_error(err, "clEnqueueMapBuffer failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         return -1;
                     }
 
                     memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
 
-                    err = clEnqueueUnmapMemObject(queue, buffers[0], dataPtr, 0,
-                                                  NULL, NULL);
+                    err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
                     if (err) {
                         print_error(err, "clEnqueueUnmapMemObject failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         return -1;
                     }
                 }
                 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
-                    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
-                                               ptrSizes[i] * num_elements,
-                                               inptr[i], 0, NULL, NULL);
+                    err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
                     if ( err != CL_SUCCESS ){
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         print_error( err, " clWriteBuffer failed" );
                         return -1;
                     }
                 }
 
-                err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
-                                     (void *)&buffers[0]);
-                err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
-                                      (void *)&buffers[1]);
+                err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+                if ( err ){
+                    clReleaseMemObject(buffers[ii]);
+                    clReleaseMemObject(buffers[ii+1]);
+                    align_free( outptr[i] );
+                    log_error( " Error creating program for %s\n", type );
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+#endif
+
+                err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+                err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clSetKernelArg failed" );
                     return -1;
                 }
 
+#ifdef USE_LOCAL_WORK_GROUP
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
                 if ( err != CL_SUCCESS ){
                     print_error( err, " clEnqueueNDRangeKernel failed" );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     return -1;
                 }
 
-                err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
-                                          ptrSizes[i] * num_elements, outptr[i],
-                                          0, NULL, NULL);
-
+                if ( ! strcmp( type, "half" ) ){
+                    err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+                }
+                else{
+                    err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+                }
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clEnqueueReadBuffer failed" );
                     return -1;
                 }
 
                 if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
-                    log_error(
-                        " %s%d test failed. cl_mem_flags src: %s dst: %s\n",
-                        type, 1 << i, flag_set_names[src_flag_id],
-                        flag_set_names[dst_flag_id]);
+                    log_error( " %s%d test failed\n", type, 1<<i );
                     total_errors++;
                 }
                 else{
-                    log_info(
-                        " %s%d test passed. cl_mem_flags src: %s dst: %s\n",
-                        type, 1 << i, flag_set_names[src_flag_id],
-                        flag_set_names[dst_flag_id]);
+                    log_info( " %s%d test passed\n", type, 1<<i );
                 }
                 // cleanup
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
             }
         } // dst cl_mem_flag
@@ -791,16 +792,19 @@
 
 int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-
+    cl_mem      buffers[10];
     void        *outptr[5];
     TestStruct  *inptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      ptrSizes[5];
     size_t      size = sizeof( TestStruct );
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
-    int i;
+    int         i, ii;
     cl_uint     j;
     int         loops = 1;      // no vector for structs
     int         src_flag_id, dst_flag_id;
@@ -817,25 +821,12 @@
     ptrSizes[3] = ptrSizes[2] << 1;
     ptrSizes[4] = ptrSizes[3] << 1;
 
-    loops = (loops < 5 ? loops : 5);
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &struct_kernel_code,
-                                          "read_write_struct");
-        if (err)
-        {
-            log_error(" Error creating program for struct\n");
-            free_mtdata(d);
-            return -1;
-        }
-
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
-            {
-                clMemWrapper buffers[2];
+            loops = ( loops < 5 ? loops : 5 );
+            for ( i = 0; i < loops; i++ ){
 
                 inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
 
@@ -844,14 +835,11 @@
                     inptr[i][j].b = get_random_float( -FLT_MAX, FLT_MAX, d );
                 }
 
+                ii = i << 1;
                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                    buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
-                                                ptrSizes[i] * num_elements,
-                                                inptr[i], &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
                 else
-                    buffers[0] =
-                        clCreateBuffer(context, flag_set[src_flag_id],
-                                       ptrSizes[i] * num_elements, NULL, &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
                 if ( err ){
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
@@ -860,15 +848,11 @@
                 }
                 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
                 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
-                    buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id],
-                                                ptrSizes[i] * num_elements,
-                                                outptr[i], &err);
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
                 else
-                    buffers[1] =
-                        clCreateBuffer(context, flag_set[dst_flag_id],
-                                       ptrSizes[i] * num_elements, NULL, &err);
-                if (!buffers[1] || err)
-                {
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+                if ( ! buffers[ii+1] || err){
+                    clReleaseMemObject(buffers[ii]);
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
                     free_mtdata(d);
@@ -877,11 +861,11 @@
 
                 if (gTestMap) {
                     void *dataPtr;
-                    dataPtr = clEnqueueMapBuffer(
-                        queue, buffers[0], CL_TRUE, CL_MAP_WRITE, 0,
-                        ptrSizes[i] * num_elements, 0, NULL, NULL, &err);
+                    dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
                     if (err) {
                         print_error(err, "clEnqueueMapBuffer failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         free_mtdata(d);
                         return -1;
@@ -889,20 +873,21 @@
 
                     memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
 
-                    err = clEnqueueUnmapMemObject(queue, buffers[0], dataPtr, 0,
-                                                  NULL, NULL);
+                    err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
                     if (err) {
                         print_error(err, "clEnqueueUnmapMemObject failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         free_mtdata(d);
                         return -1;
                     }
                 }
                 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
-                    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
-                                               ptrSizes[i] * num_elements,
-                                               inptr[i], 0, NULL, NULL);
+                    err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
                     if ( err != CL_SUCCESS ){
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         print_error( err, " clWriteBuffer failed" );
                         free_mtdata(d);
@@ -910,29 +895,56 @@
                     }
                 }
 
-                err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
-                                     (void *)&buffers[0]);
-                err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
-                                      (void *)&buffers[1]);
+                err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &struct_kernel_code, "read_write_struct" );
+                if ( err ){
+                    clReleaseMemObject(buffers[ii]);
+                    clReleaseMemObject(buffers[ii+1]);
+                    align_free( outptr[i] );
+                    log_error( " Error creating program for struct\n" );
+                    free_mtdata(d);
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+#endif
+
+                err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+                err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clSetKernelArg failed" );
                     free_mtdata(d);
                     return -1;
                 }
 
+#ifdef USE_LOCAL_WORK_GROUP
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
                 if ( err != CL_SUCCESS ){
                     print_error( err, " clEnqueueNDRangeKernel failed" );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     free_mtdata(d);
                     return -1;
                 }
 
-                err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
-                                          ptrSizes[i] * num_elements, outptr[i],
-                                          0, NULL, NULL);
+                err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clEnqueueReadBuffer failed" );
                     free_mtdata(d);
@@ -940,19 +952,17 @@
                 }
 
                 if ( verify_write_struct( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
-                    log_error(" buffer_WRITE struct%d test failed. "
-                              "cl_mem_flags src: %s dst: %s\n",
-                              1 << i, flag_set_names[src_flag_id],
-                              flag_set_names[dst_flag_id]);
+                    log_error( " buffer_WRITE struct%d test failed\n", 1<<i );
                     total_errors++;
                 }
                 else{
-                    log_info(" buffer_WRITE struct%d test passed. cl_mem_flags "
-                             "src: %s dst: %s\n",
-                             1 << i, flag_set_names[src_flag_id],
-                             flag_set_names[dst_flag_id]);
+                    log_info( " buffer_WRITE struct%d test passed\n", 1<<i );
                 }
                 // cleanup
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( (void *)inptr[i] );
             }
@@ -976,6 +986,9 @@
     cl_event    event[2];
     size_t      ptrSizes[5];
     size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
     cl_int      err;
     int         i, ii;
     int         src_flag_id, dst_flag_id;
@@ -1032,6 +1045,11 @@
                     return -1;
                 }
 
+#ifdef USE_LOCAL_WORK_GROUP
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+#endif
+
                 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
                 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
                 if ( err != CL_SUCCESS ){
@@ -1055,8 +1073,11 @@
                     return -1;
                 }
 
+#ifdef USE_LOCAL_WORK_GROUP
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
-
+#endif
                 if (err != CL_SUCCESS){
                     print_error( err, "clEnqueueNDRangeKernel failed" );
                     return -1;
@@ -1377,7 +1398,6 @@
 
 int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
-    PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
     float   *inptr[5];
     size_t  ptrSizes[5];
     int     i, err;
@@ -1402,10 +1422,8 @@
             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
     }
 
-    err = test_buffer_write(deviceID, context, queue, num_elements,
-                            sizeof(cl_half), (char *)"half", 5, (void **)inptr,
-                            buffer_write_half_kernel_code, half_kernel_name,
-                            foo, d);
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr,
+                             buffer_write_half_kernel_code, half_kernel_name, foo, d );
 
     for ( i = 0; i < 5; i++ ){
         align_free( (void *)inptr[i] );

diff --git a/test_conformance/buffers/test_image_migrate.cpp b/test_conformance/buffers/test_image_migrate.cpp
index dbdca9c..31bb0a2 100644
--- a/test_conformance/buffers/test_image_migrate.cpp
+++ b/test_conformance/buffers/test_image_migrate.cpp

@@ -345,9 +345,9 @@
             }
 
             if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 2, NULL, wgs, wls, 0, NULL, NULL)) != CL_SUCCESS) {
-                print_error(err, "Failed enqueuing the NDRange kernel.");
-                failed = 1;
-                goto cleanup;
+              print_error(err, "Failed enqueueing the NDRange kernel.");
+              failed = 1;
+              goto cleanup;
             }
           }
           // Verify the results as long as neither input is an undefined migration

diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp
index 668d7b5..bebad89 100644
--- a/test_conformance/c11_atomics/common.cpp
+++ b/test_conformance/c11_atomics/common.cpp

@@ -44,12 +44,16 @@
 {
   switch (scopeType)
   {
-      case MEMORY_SCOPE_EMPTY: return "";
-      case MEMORY_SCOPE_WORK_GROUP: return "memory_scope_work_group";
-      case MEMORY_SCOPE_DEVICE: return "memory_scope_device";
-      case MEMORY_SCOPE_ALL_DEVICES: return "memory_scope_all_devices";
-      case MEMORY_SCOPE_ALL_SVM_DEVICES: return "memory_scope_all_svm_devices";
-      default: return 0;
+  case MEMORY_SCOPE_EMPTY:
+    return "";
+  case MEMORY_SCOPE_WORK_GROUP:
+    return "memory_scope_work_group";
+  case MEMORY_SCOPE_DEVICE:
+    return "memory_scope_device";
+  case MEMORY_SCOPE_ALL_SVM_DEVICES:
+    return "memory_scope_all_svm_devices";
+  default:
+    return 0;
   }
 }
 
@@ -202,80 +206,3 @@
 template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
 template<> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() {return CL_FLT_MAX;}
 template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;}
-
-cl_int getSupportedMemoryOrdersAndScopes(
-    cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
-    std::vector<TExplicitMemoryScopeType> &memoryScopes)
-{
-    // The CL_DEVICE_ATOMIC_MEMORY_CAPABILITES is missing before 3.0, but since
-    // all orderings and scopes are required for 2.X devices and this test is
-    // skipped before 2.0 we can safely return all orderings and scopes if the
-    // device is 2.X. Query device for the supported orders.
-    if (get_device_cl_version(device) < Version{ 3, 0 })
-    {
-        memoryOrders.push_back(MEMORY_ORDER_EMPTY);
-        memoryOrders.push_back(MEMORY_ORDER_RELAXED);
-        memoryOrders.push_back(MEMORY_ORDER_ACQUIRE);
-        memoryOrders.push_back(MEMORY_ORDER_RELEASE);
-        memoryOrders.push_back(MEMORY_ORDER_ACQ_REL);
-        memoryOrders.push_back(MEMORY_ORDER_SEQ_CST);
-        memoryScopes.push_back(MEMORY_SCOPE_EMPTY);
-        memoryScopes.push_back(MEMORY_SCOPE_WORK_GROUP);
-        memoryScopes.push_back(MEMORY_SCOPE_DEVICE);
-        memoryScopes.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
-        return CL_SUCCESS;
-    }
-
-    // For a 3.0 device we can query the supported orderings and scopes
-    // directly.
-    cl_device_atomic_capabilities atomic_capabilities{};
-    test_error(
-        clGetDeviceInfo(device, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-                        sizeof(atomic_capabilities), &atomic_capabilities,
-                        nullptr),
-        "clGetDeviceInfo failed for CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES\n");
-
-    // Provided we succeeded, we can start filling the vectors.
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED)
-    {
-        memoryOrders.push_back(MEMORY_ORDER_RELAXED);
-    }
-
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL)
-    {
-        memoryOrders.push_back(MEMORY_ORDER_ACQUIRE);
-        memoryOrders.push_back(MEMORY_ORDER_RELEASE);
-        memoryOrders.push_back(MEMORY_ORDER_ACQ_REL);
-    }
-
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST)
-    {
-        // The functions not ending in explicit have the same semantics as the
-        // corresponding explicit function with memory_order_seq_cst for the
-        // memory_order argument.
-        memoryOrders.push_back(MEMORY_ORDER_EMPTY);
-        memoryOrders.push_back(MEMORY_ORDER_SEQ_CST);
-    }
-
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP)
-    {
-        memoryScopes.push_back(MEMORY_SCOPE_WORK_GROUP);
-    }
-
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE)
-    {
-        // The functions that do not have memory_scope argument have the same
-        // semantics as the corresponding functions with the memory_scope
-        // argument set to memory_scope_device.
-        memoryScopes.push_back(MEMORY_SCOPE_EMPTY);
-        memoryScopes.push_back(MEMORY_SCOPE_DEVICE);
-    }
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES)
-    {
-        // OpenCL 3.0 added memory_scope_all_devices as an alias for
-        // memory_scope_all_svm_devices, so test both.
-        memoryScopes.push_back(MEMORY_SCOPE_ALL_DEVICES);
-        memoryScopes.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
-    }
-    return CL_SUCCESS;
-}

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index bbcc68c..c45e1aa 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -35,26 +35,25 @@
 
 enum TExplicitAtomicType
 {
-    TYPE_ATOMIC_INT,
-    TYPE_ATOMIC_UINT,
-    TYPE_ATOMIC_LONG,
-    TYPE_ATOMIC_ULONG,
-    TYPE_ATOMIC_FLOAT,
-    TYPE_ATOMIC_DOUBLE,
-    TYPE_ATOMIC_INTPTR_T,
-    TYPE_ATOMIC_UINTPTR_T,
-    TYPE_ATOMIC_SIZE_T,
-    TYPE_ATOMIC_PTRDIFF_T,
-    TYPE_ATOMIC_FLAG
+  TYPE_ATOMIC_INT,
+  TYPE_ATOMIC_UINT,
+  TYPE_ATOMIC_LONG,
+  TYPE_ATOMIC_ULONG,
+  TYPE_ATOMIC_FLOAT,
+  TYPE_ATOMIC_DOUBLE,
+  TYPE_ATOMIC_INTPTR_T,
+  TYPE_ATOMIC_UINTPTR_T,
+  TYPE_ATOMIC_SIZE_T,
+  TYPE_ATOMIC_PTRDIFF_T,
+  TYPE_ATOMIC_FLAG
 };
 
 enum TExplicitMemoryScopeType
 {
-    MEMORY_SCOPE_EMPTY,
-    MEMORY_SCOPE_WORK_GROUP,
-    MEMORY_SCOPE_DEVICE,
-    MEMORY_SCOPE_ALL_DEVICES, // Alias for MEMORY_SCOPE_ALL_SVM_DEVICES
-    MEMORY_SCOPE_ALL_SVM_DEVICES
+  MEMORY_SCOPE_EMPTY,
+  MEMORY_SCOPE_WORK_GROUP,
+  MEMORY_SCOPE_DEVICE,
+  MEMORY_SCOPE_ALL_SVM_DEVICES
 };
 
 extern bool gHost; // temporary flag for testing native host threads (test verification)
@@ -66,16 +65,10 @@
 extern bool gDebug; // print OpenCL kernel code
 extern int gInternalIterations; // internal test iterations for atomic operation, sufficient to verify atomicity
 extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device
-extern cl_device_atomic_capabilities gAtomicMemCap,
-    gAtomicFenceCap; // atomic memory and fence capabilities for this device
 
 extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType);
 extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType);
 
-extern cl_int getSupportedMemoryOrdersAndScopes(
-    cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
-    std::vector<TExplicitMemoryScopeType> &memoryScopes);
-
 class AtomicTypeInfo
 {
 public:
@@ -288,89 +281,6 @@
     else
       return 0;
   }
-
-  int CheckCapabilities(TExplicitMemoryScopeType memoryScope,
-                        TExplicitMemoryOrderType memoryOrder)
-  {
-      /*
-          Differentiation between atomic fence and other atomic operations
-          does not need to occur here.
-
-          The initialisation of this test checks that the minimum required
-          capabilities are supported by this device.
-
-          The following switches allow the test to skip if optional capabilites
-          are not supported by the device.
-        */
-      switch (memoryScope)
-      {
-          case MEMORY_SCOPE_EMPTY: {
-              break;
-          }
-          case MEMORY_SCOPE_WORK_GROUP: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_SCOPE_DEVICE: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_SCOPE_ALL_DEVICES: // fallthough
-          case MEMORY_SCOPE_ALL_SVM_DEVICES: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          default: {
-              log_info("Invalid memory scope\n");
-              break;
-          }
-      }
-
-      switch (memoryOrder)
-      {
-          case MEMORY_ORDER_EMPTY: {
-              break;
-          }
-          case MEMORY_ORDER_RELAXED: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_ORDER_ACQUIRE:
-          case MEMORY_ORDER_RELEASE:
-          case MEMORY_ORDER_ACQ_REL: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_ORDER_SEQ_CST: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          default: {
-              log_info("Invalid memory order\n");
-              break;
-          }
-      }
-
-      return 0;
-  }
   virtual bool SVMDataBufferAllSVMConsistent() {return false;}
   bool UseSVM() {return _useSVM;}
   void StartValue(HostDataType startValue) {_startValue = startValue;}
@@ -429,7 +339,6 @@
 public:
   using CBasicTest<HostAtomicType, HostDataType>::LocalMemory;
   using CBasicTest<HostAtomicType, HostDataType>::MaxGroupSize;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
   {
   }
@@ -480,10 +389,6 @@
       MaxGroupSize(16); // increase number of groups by forcing smaller group size
     else
       MaxGroupSize(0); // group size limited by device capabilities
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
     return CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   }
   virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
@@ -493,11 +398,16 @@
     std::vector<TExplicitMemoryScopeType> memoryScope;
     int error = 0;
 
-    // For OpenCL-3.0 and later some orderings and scopes are optional, so here
-    // we query for the supported ones.
-    test_error_ret(
-        getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
-        "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+    memoryOrder.push_back(MEMORY_ORDER_EMPTY);
+    memoryOrder.push_back(MEMORY_ORDER_RELAXED);
+    memoryOrder.push_back(MEMORY_ORDER_ACQUIRE);
+    memoryOrder.push_back(MEMORY_ORDER_RELEASE);
+    memoryOrder.push_back(MEMORY_ORDER_ACQ_REL);
+    memoryOrder.push_back(MEMORY_ORDER_SEQ_CST);
+    memoryScope.push_back(MEMORY_SCOPE_EMPTY);
+    memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP);
+    memoryScope.push_back(MEMORY_SCOPE_DEVICE);
+    memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
 
     for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
     {
@@ -540,17 +450,11 @@
   }
   virtual cl_uint MaxHostThreads()
   {
-      // block host threads execution for memory scope different than
-      // memory_scope_all_svm_devices
-      if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
-          || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
-      {
-          return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
-      }
-      else
-      {
-          return 0;
-      }
+    // block host threads execution for memory scope different than memory_scope_all_svm_devices
+    if(MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
+      return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
+    else
+      return 0;
   }
 private:
   TExplicitMemoryOrderType _memoryOrder;
@@ -566,8 +470,6 @@
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderStr;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
-
   CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
   {
   }
@@ -589,11 +491,16 @@
     std::vector<TExplicitMemoryScopeType> memoryScope;
     int error = 0;
 
-    // For OpenCL-3.0 and later some orderings and scopes are optional, so here
-    // we query for the supported ones.
-    test_error_ret(
-        getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
-        "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+    memoryOrder.push_back(MEMORY_ORDER_EMPTY);
+    memoryOrder.push_back(MEMORY_ORDER_RELAXED);
+    memoryOrder.push_back(MEMORY_ORDER_ACQUIRE);
+    memoryOrder.push_back(MEMORY_ORDER_RELEASE);
+    memoryOrder.push_back(MEMORY_ORDER_ACQ_REL);
+    memoryOrder.push_back(MEMORY_ORDER_SEQ_CST);
+    memoryScope.push_back(MEMORY_SCOPE_EMPTY);
+    memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP);
+    memoryScope.push_back(MEMORY_SCOPE_DEVICE);
+    memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
 
     for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
     {
@@ -610,15 +517,6 @@
           MemoryOrder(memoryOrder[oi]);
           MemoryOrder2(memoryOrder[o2i]);
           MemoryScope(memoryScope[si]);
-
-          if (CheckCapabilities(MemoryScope(), MemoryOrder())
-              == TEST_SKIPPED_ITSELF)
-              continue; // skip test - not applicable
-
-          if (CheckCapabilities(MemoryScope(), MemoryOrder2())
-              == TEST_SKIPPED_ITSELF)
-              continue; // skip test - not applicable
-
           EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
         }
       }
@@ -802,35 +700,23 @@
     "\n";
   if(LocalMemory())
   {
-      // memory_order_relaxed is sufficient for these initialization operations
-      // as the barrier below will act as a fence, providing an order to the
-      // operations. memory_scope_work_group is sufficient as local memory is
-      // only visible within the work-group.
-      code += R"(
-              // initialize atomics not reachable from host (first thread
-              // is doing this, other threads are waiting on barrier)
-              if(get_local_id(0) == 0)
-                for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)
-                {)";
-      if (aTypeName == "atomic_flag")
-      {
-          code += R"(
-                  if(finalDest[dstItemIdx])
-                    atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
-                                                      memory_order_relaxed,
-                                                      memory_scope_work_group);
-                  else
-                    atomic_flag_clear_explicit(destMemory+dstItemIdx,
-                                               memory_order_relaxed,
-                                               memory_scope_work_group);)";
-      }
+    code +=
+      "  // initialize atomics not reachable from host (first thread is doing this, other threads are waiting on barrier)\n"
+      "  if(get_local_id(0) == 0)\n"
+      "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n"
+      "    {\n";
+    if(aTypeName == "atomic_flag")
+    {
+      code +=
+        "      if(finalDest[dstItemIdx])\n"
+        "        atomic_flag_test_and_set(destMemory+dstItemIdx);\n"
+        "      else\n"
+        "        atomic_flag_clear(destMemory+dstItemIdx);\n";
+    }
     else
     {
-        code += R"(
-                atomic_store_explicit(destMemory+dstItemIdx,
-                                      finalDest[dstItemIdx],
-                                      memory_order_relaxed,
-                                      memory_scope_work_group);)";
+      code +=
+        "      atomic_store(destMemory+dstItemIdx, finalDest[dstItemIdx]);\n";
     }
     code +=
       "    }\n"
@@ -887,29 +773,20 @@
         "  if(get_local_id(0) == 0) // first thread in workgroup\n";
     else
       // global atomics declared in program scope
-      code += R"(
-                if(atomic_fetch_add_explicit(&finishedThreads, 1u,
-                                           memory_order_relaxed,
-                                           memory_scope_work_group)
-                   == get_global_size(0)-1) // last finished thread
-                   )";
+      code +=
+      "  if(atomic_fetch_add(&finishedThreads, 1) == get_global_size(0)-1)\n"
+      "    // last finished thread\n";
     code +=
         "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n";
     if(aTypeName == "atomic_flag")
     {
-        code += R"(
-                finalDest[dstItemIdx] =
-                    atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
-                                                      memory_order_relaxed,
-                                                      memory_scope_work_group);)";
+      code +=
+        "      finalDest[dstItemIdx] = atomic_flag_test_and_set(destMemory+dstItemIdx);\n";
     }
     else
     {
-        code += R"(
-                finalDest[dstItemIdx] =
-                    atomic_load_explicit(destMemory+dstItemIdx,
-                                         memory_order_relaxed,
-                                         memory_scope_work_group);)";
+      code +=
+        "      finalDest[dstItemIdx] = atomic_load(destMemory+dstItemIdx);\n";
     }
   }
   code += "}\n"
@@ -971,76 +848,50 @@
 
   if(deviceThreadCount > 0)
   {
-      // This loop iteratively reduces the workgroup size by 2 and then
-      // re-generates the kernel with the reduced
-      // workgroup size until we find a size which is admissible for the kernel
-      // being run or reduce the wg size
-      // to the trivial case of 1 (which was separately verified to be accurate
-      // for the kernel being run)
+    cl_ulong usedLocalMemory;
+    cl_ulong totalLocalMemory;
+    cl_uint maxWorkGroupSize;
 
-      while ((CurrentGroupSize() > 1))
-      {
-          // Re-generate the kernel code with the current group size
-          if (kernel) clReleaseKernel(kernel);
-          if (program) clReleaseProgram(program);
-          programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
-              + FunctionCode() + KernelCode(numDestItems);
-          programLine = programSource.c_str();
-          if (create_single_kernel_helper_with_build_options(
-                  context, &program, &kernel, 1, &programLine,
-                  "test_atomic_kernel", gOldAPI ? "" : nullptr))
-          {
-              return -1;
-          }
-          // Get work group size for the new kernel
-          error = clGetKernelWorkGroupInfo(kernel, deviceID,
-                                           CL_KERNEL_WORK_GROUP_SIZE,
-                                           sizeof(groupSize), &groupSize, NULL);
-          test_error(error,
-                     "Unable to obtain max work group size for device and "
-                     "kernel combo");
+    // Set up the kernel code
+    programSource = PragmaHeader(deviceID)+ProgramHeader(numDestItems)+FunctionCode()+KernelCode(numDestItems);
+    programLine = programSource.c_str();
+    if(create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &programLine, "test_atomic_kernel",
+      gOldAPI ? "" : "-cl-std=CL2.0"))
+    {
+      return -1;
+    }
+    if(gDebug)
+    {
+      log_info("Program source:\n");
+      log_info("%s\n", programLine);
+    }
+    // tune up work sizes based on kernel info
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL);
+    test_error(error, "Unable to obtain max work group size for device and kernel combo");
 
-          if (LocalMemory())
-          {
-              cl_ulong usedLocalMemory;
-              cl_ulong totalLocalMemory;
-              cl_uint maxWorkGroupSize;
+    if(LocalMemory())
+    {
+      error = clGetKernelWorkGroupInfo (kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(usedLocalMemory), &usedLocalMemory, NULL);
+      test_error(error, "clGetKernelWorkGroupInfo failed");
 
-              error = clGetKernelWorkGroupInfo(
-                  kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
-                  sizeof(usedLocalMemory), &usedLocalMemory, NULL);
-              test_error(error, "clGetKernelWorkGroupInfo failed");
+      error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(totalLocalMemory), &totalLocalMemory, NULL);
+      test_error(error, "clGetDeviceInfo failed");
 
-              error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                                      sizeof(totalLocalMemory),
-                                      &totalLocalMemory, NULL);
-              test_error(error, "clGetDeviceInfo failed");
+      // We know that each work-group is going to use typeSize * deviceThreadCount bytes of local memory
+      // so pick the maximum value for deviceThreadCount that uses all the local memory.
+      maxWorkGroupSize = ((totalLocalMemory - usedLocalMemory) / typeSize);
 
-              // We know that each work-group is going to use typeSize *
-              // deviceThreadCount bytes of local memory
-              // so pick the maximum value for deviceThreadCount that uses all
-              // the local memory.
-              maxWorkGroupSize =
-                  ((totalLocalMemory - usedLocalMemory) / typeSize);
+      if(maxWorkGroupSize < groupSize)
+        groupSize = maxWorkGroupSize;
+    }
 
-              if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize;
-          }
-          if (CurrentGroupSize() <= groupSize)
-              break;
-          else
-              CurrentGroupSize(CurrentGroupSize() / 2);
-      }
+    CurrentGroupSize((cl_uint)groupSize);
     if(CurrentGroupSize() > deviceThreadCount)
       CurrentGroupSize(deviceThreadCount);
     if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
       deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
     threadCount = deviceThreadCount+hostThreadCount;
   }
-  if (gDebug)
-  {
-      log_info("Program source:\n");
-      log_info("%s\n", programLine);
-  }
   if(deviceThreadCount > 0)
     log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, CurrentGroupSize());
   if(hostThreadCount > 0)
@@ -1048,7 +899,7 @@
 
   refValues.resize(threadCount*NumNonAtomicVariablesPerThread());
 
-  // Generate ref data if we have a ref generator provided
+  // Generate ref data if we have a ref generator provided		
   d = init_genrand(gRandomSeed);
   startRefValues.resize(threadCount*NumNonAtomicVariablesPerThread());
   if(GenerateRefs(threadCount, &startRefValues[0], d))
@@ -1085,13 +936,11 @@
       return -1;
     }
     memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems);
-    streams[0] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                                typeSize * numDestItems, svmAtomicBuffer, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), typeSize * numDestItems, svmAtomicBuffer, NULL);
   }
   else
   {
-      streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                  typeSize * numDestItems, &destItems[0], NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, &destItems[0], NULL);
   }
   if (!streams[0])
   {
@@ -1112,18 +961,12 @@
     }
     if(startRefValues.size())
       memcpy(svmDataBuffer, &startRefValues[0], typeSize*threadCount*NumNonAtomicVariablesPerThread());
-    streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                                typeSize * threadCount
-                                    * NumNonAtomicVariablesPerThread(),
-                                svmDataBuffer, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), typeSize*threadCount*NumNonAtomicVariablesPerThread(), svmDataBuffer, NULL);
   }
   else
   {
-      streams[1] = clCreateBuffer(
-          context,
-          ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
-          typeSize * threadCount * NumNonAtomicVariablesPerThread(),
-          startRefValues.size() ? &startRefValues[0] : 0, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
+      typeSize * threadCount*NumNonAtomicVariablesPerThread(), startRefValues.size() ? &startRefValues[0] : 0, NULL);
   }
   if (!streams[1])
   {

diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
index 3132c40..687861e 100644
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp

@@ -26,8 +26,6 @@
 bool gDebug = false; // always print OpenCL kernel code
 int gInternalIterations = 10000; // internal test iterations for atomic operation, sufficient to verify atomicity
 int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
-cl_device_atomic_capabilities gAtomicMemCap,
-    gAtomicFenceCap; // atomic memory and fence capabilities for this device
 
 extern int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
@@ -110,97 +108,11 @@
 test_status InitCL(cl_device_id device) {
     auto version = get_device_cl_version(device);
     auto expected_min_version = Version(2, 0);
-
     if (version < expected_min_version)
     {
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
     }
-
-    if (version >= Version(3, 0))
-    {
-        cl_int error;
-
-        error = clGetDeviceInfo(device, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-                                sizeof(gAtomicMemCap), &gAtomicMemCap, NULL);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "Unable to get atomic memory capabilities\n");
-            return TEST_FAIL;
-        }
-
-        error =
-            clGetDeviceInfo(device, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
-                            sizeof(gAtomicFenceCap), &gAtomicFenceCap, NULL);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "Unable to get atomic fence capabilities\n");
-            return TEST_FAIL;
-        }
-
-        if ((gAtomicFenceCap
-             & (CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL
-                | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP))
-            == 0)
-        {
-            log_info(
-                "Minimum atomic fence capabilities unsupported by device\n");
-            return TEST_FAIL;
-        }
-
-        if ((gAtomicMemCap
-             & (CL_DEVICE_ATOMIC_ORDER_RELAXED
-                | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP))
-            == 0)
-        {
-            log_info(
-                "Minimum atomic memory capabilities unsupported by device\n");
-            return TEST_FAIL;
-        }
-
-        // Disable program scope global variable testing in the case that it is
-        // not supported on an OpenCL-3.0 driver.
-        size_t max_global_variable_size{};
-        test_error_ret(clGetDeviceInfo(device,
-                                       CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
-                                       sizeof(max_global_variable_size),
-                                       &max_global_variable_size, nullptr),
-                       "Unable to get max global variable size\n", TEST_FAIL);
-        if (0 == max_global_variable_size)
-        {
-            gNoGlobalVariables = true;
-        }
-
-        // Disable generic address space testing in the case that it is not
-        // supported on an OpenCL-3.0 driver.
-        cl_bool generic_address_space_support{};
-        test_error_ret(
-            clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
-                            sizeof(generic_address_space_support),
-                            &generic_address_space_support, nullptr),
-            "Unable to get generic address space support\n", TEST_FAIL);
-        if (CL_FALSE == generic_address_space_support)
-        {
-            gNoGenericAddressSpace = true;
-        }
-    }
-    else
-    {
-        // OpenCL 2.x device, default to all capabilities
-        gAtomicMemCap = CL_DEVICE_ATOMIC_ORDER_RELAXED
-            | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_ORDER_SEQ_CST
-            | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP | CL_DEVICE_ATOMIC_SCOPE_DEVICE
-            | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES;
-
-        gAtomicFenceCap = CL_DEVICE_ATOMIC_ORDER_RELAXED
-            | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_ORDER_SEQ_CST
-            | CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM
-            | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP | CL_DEVICE_ATOMIC_SCOPE_DEVICE
-            | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES;
-    }
-
     return TEST_PASS;
 }
 

diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index c3a190b..f12f955 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp

@@ -29,9 +29,7 @@
 public:
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
   {
     OldValueCheck(false);
@@ -45,10 +43,6 @@
     if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
       MemoryOrder() == MEMORY_ORDER_ACQ_REL)
       return 0; //skip test - not applicable
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
     return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   }
   virtual std::string ProgramCore()
@@ -204,10 +198,7 @@
 public:
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
   {
     OldValueCheck(false);
@@ -221,27 +212,15 @@
     if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
       MemoryOrder() == MEMORY_ORDER_ACQ_REL)
       return 0; //skip test - not applicable
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
     return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   }
   virtual std::string ProgramCore()
   {
-      // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
-      // should be MEMORY_ORDER_RELEASE
-      std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
-      std::string memoryOrderScopeStore =
-          (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
-          ? (", memory_order_release" + MemoryScopeStr())
-          : memoryOrderScopeLoad;
-      std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
-      return "  atomic_store" + postfix + "(&destMemory[tid], tid"
-          + memoryOrderScopeStore
-          + ");\n"
-            "  oldValues[tid] = atomic_load"
-          + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  atomic_store(&destMemory[tid], tid);\n"
+      "  oldValues[tid] = atomic_load"+postfix+"(&destMemory[tid]"+memoryOrderScope+");\n";
   }
   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
   {
@@ -456,11 +435,9 @@
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
   {
     StartValue(123456);
@@ -474,13 +451,6 @@
     if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
       (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
       return 0; // failure argument shall be no stronger than the success
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
     return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   }
   virtual std::string ProgramCore()
@@ -1624,30 +1594,6 @@
       orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
     return orderStr + MemoryScopeStr();
   }
-
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue)
-  {
-      // This test assumes support for the memory_scope_device scope in the case
-      // that LocalMemory() == false. Therefore we should skip this test in that
-      // configuration on a 3.0 driver since supporting the memory_scope_device
-      // scope is optionaly.
-      if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
-      {
-          if (!LocalMemory()
-              && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
-          {
-              log_info(
-                  "Skipping atomic_flag test due to use of atomic_scope_device "
-                  "which is optionally not supported on this device\n");
-              return 0; // skip test - not applicable
-          }
-      }
-      return CBasicTestMemOrderScope<HostAtomicType,
-                                     HostDataType>::ExecuteSingleTest(deviceID,
-                                                                      context,
-                                                                      queue);
-  }
   virtual std::string ProgramCore()
   {
     std::string memoryOrderScope = MemoryOrderScopeStr();
@@ -1843,11 +1789,7 @@
   }
   virtual bool SVMDataBufferAllSVMConsistent()
   {
-      // Although memory_scope_all_devices doesn't mention SVM it is just an
-      // alias for memory_scope_all_svm_devices.  So both scopes interact with
-      // SVM allocations, on devices that support those, just the same.
-      return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
-          || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
+    return MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
   }
   virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
   {

diff --git a/test_conformance/clcpp/CMakeLists.txt b/test_conformance/clcpp/CMakeLists.txt
new file mode 100644
index 0000000..04484e7
--- /dev/null
+++ b/test_conformance/clcpp/CMakeLists.txt

@@ -0,0 +1,21 @@
+add_subdirectory(address_spaces)
+add_subdirectory(api)
+add_subdirectory(atomics)
+add_subdirectory(attributes)
+add_subdirectory(common_funcs)
+add_subdirectory(convert)
+add_subdirectory(device_queue)
+add_subdirectory(geometric_funcs)
+add_subdirectory(images)
+add_subdirectory(integer_funcs)
+add_subdirectory(math_funcs)
+add_subdirectory(pipes)
+add_subdirectory(program_scope_ctors_dtors)
+add_subdirectory(reinterpret)
+add_subdirectory(relational_funcs)
+add_subdirectory(spec_constants)
+add_subdirectory(subgroups)
+add_subdirectory(synchronization)
+add_subdirectory(vload_vstore)
+add_subdirectory(workgroups)
+add_subdirectory(workitems)

diff --git a/test_conformance/clcpp/address_spaces/CMakeLists.txt b/test_conformance/clcpp/address_spaces/CMakeLists.txt
new file mode 100644
index 0000000..2b6369f
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_ADDRESS_SPACES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/address_spaces/common.hpp b/test_conformance/clcpp/address_spaces/common.hpp
new file mode 100644
index 0000000..47b78ea
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/common.hpp

@@ -0,0 +1,203 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#define RUN_ADDRESS_SPACES_TEST_MACRO(TEST_CLASS) \
+    last_error = run_address_spaces_test(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+// This is a base class for address spaces tests.
+template <class T>
+struct address_spaces_test : public detail::base_func_type<T>
+{
+    // output buffer type
+    typedef T type;
+
+    virtual ~address_spaces_test() {};
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+    // Returns kernel names IN ORDER
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    virtual T operator()(size_t i, size_t work_group_size) = 0;
+
+    // If local size has to be set in clEnqueueNDRangeKernel()
+    // this should return true; otherwise - false;
+    virtual bool set_local_size()
+    {
+        return false;
+    }
+
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                                      cl_device_id device,
+                                      size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t wg_size = work_group_size;
+        for(auto&k : kernels)
+        {
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            wg_size = (std::min)(max_wg_size, wg_size);
+        }
+        return wg_size;
+    }
+
+    // This covers typical case: each kernel is executed once, every kernel
+    // has only one argument which is output buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size,
+                           size_t work_group_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+};
+
+template <class address_spaces_test>
+int run_address_spaces_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, address_spaces_test op)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t wg_size;
+    size_t work_size[1];
+    cl_int err;
+
+    typedef typename address_spaces_test::type TYPE;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+
+    // Find the max possible wg size for among all the kernels
+    wg_size = op.get_max_local_size(kernels, device, 1024, err);
+    RETURN_ON_ERROR(err);
+
+    work_size[0] = count;
+    if(op.set_local_size())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size)
+        );
+        work_size[0] = wg_number * wg_size;
+    }
+
+    // output on host
+    std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
+
+    // output buffer
+    buffers[0] = clCreateBuffer
+        (context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
+    RETURN_ON_ERROR(err)
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        TYPE v = op(i, wg_size);
+        if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<TYPE>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    for(auto& k : kernels)
+        clReleaseKernel(k);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP

diff --git a/test_conformance/clcpp/address_spaces/main.cpp b/test_conformance/clcpp/address_spaces/main.cpp
new file mode 100644
index 0000000..3bda012
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_pointer_types.hpp"
+#include "test_storage_types.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/address_spaces/test_pointer_types.hpp b/test_conformance/clcpp/address_spaces/test_pointer_types.hpp
new file mode 100644
index 0000000..edc50b6
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/test_pointer_types.hpp

@@ -0,0 +1,411 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
+#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// ----------------------------
+// ---------- PRIVATE
+// ----------------------------
+
+template <class T>
+struct private_pointer_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "private_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    TYPE v = TYPE(gid);\n"
+                "    private_ptr<TYPE> v_ptr1(dynamic_asptr_cast<private_ptr<TYPE>>(&v));\n"
+                "    private_ptr<TYPE> v_ptr2(v_ptr1);\n"
+                "    TYPE a[] = { TYPE(0), TYPE(1) };\n"
+                "    private_ptr<TYPE> a_ptr = dynamic_asptr_cast<private_ptr<TYPE>>(a);\n"
+                "    a_ptr++;\n"
+                "    TYPE * a_ptr2 = a_ptr.get();\n"
+                "    *a_ptr2 = *v_ptr2;\n"
+                "    output[gid] = a[1];\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_private_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // private pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- LOCAL
+// ----------------------------
+
+template <class T>
+struct local_pointer_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "local_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        size_t r = i / work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(r));
+    }
+
+    bool set_local_size()
+    {
+        return true;
+    }
+
+    size_t get_max_local_size(const std::vector<cl_kernel>& kernels, 
+                              cl_device_id device,
+                              size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        // Additionally this already set 2nd argument of the test kernel, so we don't
+        // have to modify execute() method.
+        error = clSetKernelArg(kernels[0], 1, sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg");
+
+        size_t wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernels[0], device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        wg_size = wg_size <= work_group_size ? wg_size : work_group_size;        
+        return wg_size;
+    }
+
+    // Every work-item writes id of its work-group to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
+                                                              "local uint * local_mem_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output, "
+                                                              "local_ptr<uint[]> local_mem_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    size_t lid = get_local_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                // 1st work-item in work-group writes get_group_id() to var
+                "    local<uint> var;\n"
+                "    local_ptr<uint> var_ptr = var.ptr();\n"
+                "    if(lid == 0) { *var_ptr = get_group_id(0); }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // last work-item in work-group writes var to 1st element of local_mem
+                "    local_ptr<uint[]> local_mem_ptr2(local_mem_ptr);\n"
+                "    auto local_mem_ptr3 = local_mem_ptr2.release();\n"
+                "    if(lid == (get_local_size(0) - 1)) { *(local_mem_ptr3) = var; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // each work-item in work-group writes local_mem_ptr[0] to output[work-item-global-id]
+                "    output[gid] = local_mem_ptr[0];\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_local_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // local pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- GLOBAL
+// ----------------------------
+
+template <class T>
+struct global_pointer_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "global_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "void set_to_gid(global_ptr<TYPE> ptr)\n"
+                "{\n"
+                "    *ptr = TYPE(get_global_id(0));"
+                "}\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    auto ptr = output.get();\n"
+                "    global_ptr<TYPE> ptr2(ptr);\n"
+                "    ptr2 += ptrdiff_t(gid);\n"
+                "    set_to_gid(ptr2);\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_global_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // global pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- CONSTANT
+// ----------------------------
+
+template <class T>
+struct constant_pointer_test : public address_spaces_test<T>
+{
+    // m_test_value is just a random value we use in this test.
+    constant_pointer_test() : m_test_value(0xdeaddeadU)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "constant_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+
+    // Each work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
+                                                              "constant uint * const_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(const_ptr[0]);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output, "
+                                                              "constant_ptr<uint[]> const_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    constant_ptr<uint[]> const_ptr2 = const_ptr;\n"
+                "    auto const_ptr3 = const_ptr2.get();\n"
+                "    output[gid] = *const_ptr3;\n"
+                "}\n";        
+        #endif
+    }
+
+    // execute() method needs to be modified, to create additional buffer
+    // and set it in 2nd arg (constant_ptr<uint[]> const_ptr)
+    cl_int execute(const std::vector<cl_kernel>& kernels,
+                   cl_mem& output_buffer,
+                   cl_command_queue& queue,
+                   size_t work_size,
+                   size_t work_group_size)
+    {           
+        cl_int err;
+
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo");
+
+        // Create constant buffer
+        auto const_buff = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY), sizeof(cl_uint), NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+        // Write m_test_value to const_buff
+        err = clEnqueueWriteBuffer(
+            queue, const_buff, CL_TRUE, 0, sizeof(cl_uint),
+            static_cast<void *>(&m_test_value), 0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+        err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernels[0], 1, sizeof(const_buff), &const_buff);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &work_group_size : NULL, 0, NULL, NULL
+        );      
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+        err = clFinish(queue);
+        RETURN_ON_CL_ERROR(err, "clFinish");
+
+        err = clReleaseMemObject(const_buff);
+        RETURN_ON_CL_ERROR(err, "clReleaseMemObject");
+        return err;
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_constant_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // constant pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP

diff --git a/test_conformance/clcpp/address_spaces/test_storage_types.hpp b/test_conformance/clcpp/address_spaces/test_storage_types.hpp
new file mode 100644
index 0000000..e47f952
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/test_storage_types.hpp

@@ -0,0 +1,418 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
+#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// ----------------------------
+// ---------- PRIVATE
+// ----------------------------
+
+template <class T>
+struct private_storage_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "private_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    priv<TYPE> v = { TYPE(gid) };\n"
+                "    const TYPE *v_ptr1 = &v;\n"
+                "    private_ptr<TYPE> v_ptr2 = v.ptr();\n"
+                "    TYPE v2 = *v_ptr2;\n"
+                "    priv<array<TYPE, 1>> a;\n"
+                "    *(a.begin()) = v2;\n"
+                "    output[gid] = a[0];\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_private_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // private storage
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- LOCAL
+// ----------------------------
+
+template <class T>
+struct local_storage_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "local_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        size_t r = i / work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(r));
+    }
+
+    bool set_local_size()
+    {
+        return true;
+    }
+
+    // Every work-item writes id of its work-group to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // Using program scope local variable
+                "local<" + type_name<T>() + "> program_scope_var;"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    size_t lid = get_local_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                // 1st work-item in work-group writes get_group_id() to var
+                "    local<TYPE> var;\n"
+                "    if(lid == 0) { var = TYPE(get_group_id(0)); }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // last work-item in work-group writes var to 1st element of a
+                "    local_ptr<TYPE> var_ptr = var.ptr();\n"
+                "    TYPE var2 = *var_ptr;\n"
+                "    local<array<TYPE, 1>> a;\n"
+                "    if(lid == (get_local_size(0) - 1)) { *(a.begin()) = var2; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // 1st work-item in work-group writes a[0] to program_scope_var
+                "    if(lid == 0) { program_scope_var = a[0]; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                "    const TYPE *program_scope_var_ptr = &program_scope_var;\n"
+                "    output[gid] = *program_scope_var_ptr;\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_local_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // local storage
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_int16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- GLOBAL
+// ----------------------------
+
+template <class T>
+struct global_storage_test : public address_spaces_test<T>
+{
+    // m_test_value is just a random value we use in this test.
+    // m_test_value should not be zero.
+    global_storage_test() : m_test_value(0xdeaddeadU)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "global_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+
+    std::vector<std::string> get_kernel_names()
+    {
+        return 
+        {
+            this->get_kernel_name() + "1",
+            this->get_kernel_name() + "2"
+        };
+    }
+
+    // Every work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_names()[0] + "(global " + type_name<T>() + " *output, "
+                                                                  "uint test_value)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(test_value);\n"
+                "}\n"
+                "__kernel void " + this->get_kernel_names()[1] + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = output[gid];\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                // Using program scope global variable
+                "global<array<TYPE, 1>> program_scope_global_array;"
+                "__kernel void " + this->get_kernel_names()[0] + "(global_ptr<" + type_name<T>() + "[]> output, "
+                                                                  "uint test_value)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                // 1st work-item writes test_value to program_scope_global_array[0]
+                "    if(gid == 0) { program_scope_global_array[0] = test_value; }\n"
+                "}\n" 
+                "__kernel void " + this->get_kernel_names()[1] + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    static global<uint> func_scope_global_var { 0 };\n"
+                // if (func_scope_global_var == 1) is true then
+                // each work-item saves program_scope_global_array[0] to output[work-item-global-id]
+                "    if(func_scope_global_var == uint(1))\n"
+                "    {\n"
+                "        output[gid] = program_scope_global_array[0];\n"
+                "        return;\n"
+                "    }\n"
+                // 1st work-item writes 1 to func_scope_global_var
+                "    if(gid == 0) { func_scope_global_var = uint(1); }\n"
+                "}\n";         
+        #endif
+    }
+
+    // In this test execution is quite complicated. We have two kernels.
+    // 1st kernel tests program scope global variable, and 2nd kernel tests 
+    // function scope global variable (that's why it is run twice).
+    cl_int execute(const std::vector<cl_kernel>& kernels,
+                   cl_mem& output_buffer,
+                   cl_command_queue& queue,
+                   size_t work_size,
+                   size_t wg_size)
+    {           
+        cl_int err;
+        err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernels[0], 1, sizeof(cl_uint), &m_test_value);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+        // Run first kernel, once.
+        // This kernel saves m_test_value to program scope global variable called program_scope_global_var
+        err = clEnqueueNDRangeKernel(
+            queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        err = clFinish(queue);
+        RETURN_ON_CL_ERROR(err, "clFinish")
+
+        err = clSetKernelArg(kernels[1], 0, sizeof(output_buffer), &output_buffer);
+        // Run 2nd kernel, twice.
+        // 1st run: program_scope_global_var is saved to function scope global array called func_scope_global_array
+        // 2nd run: each work-item saves func_scope_global_array[0] to ouput[work-item-global-id]
+        for(size_t i = 0; i < 2; i++)
+        {
+            err = clEnqueueNDRangeKernel(
+                queue, kernels[1], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+            err = clFinish(queue);
+            RETURN_ON_CL_ERROR(err, "clFinish")
+        }
+        return err;
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_global_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_int16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- CONSTANT
+// ----------------------------
+
+template <class T>
+struct constant_storage_test : public address_spaces_test<T>
+{
+    // m_test_value is just a random value we use in this test.
+    constant_storage_test() : m_test_value(0xdeaddeadU)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "constant_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+
+    // Every work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(" + std::to_string(m_test_value) + ");\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // Program scope constant variable, program_scope_var == (m_test_value - 1)
+                "constant<uint> program_scope_const{ (" + std::to_string(m_test_value) + " - 1) };"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    static constant<uint> func_scope_const{ 1 };\n"
+                "    constant_ptr<uint> ps_const_ptr = program_scope_const.ptr();\n"
+                // "    constant_ptr<array<uint, 1>> fs_const_ptr = &func_scope_const;\n"
+                "    output[gid] = TYPE(*ps_const_ptr + func_scope_const);\n"
+                "}\n";        
+        #endif
+    }
+private:
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_constant_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_int16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP

diff --git a/test_conformance/clcpp/api/CMakeLists.txt b/test_conformance/clcpp/api/CMakeLists.txt
new file mode 100644
index 0000000..30763d6
--- /dev/null
+++ b/test_conformance/clcpp/api/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_API)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/api/main.cpp b/test_conformance/clcpp/api/main.cpp
new file mode 100644
index 0000000..89f8f1b
--- /dev/null
+++ b/test_conformance/clcpp/api/main.cpp

@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_spec_consts.hpp"
+#include "test_ctors_dtors.hpp"
+#include "test_ctors.hpp"
+#include "test_dtors.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/api/test_ctors.hpp b/test_conformance/clcpp/api/test_ctors.hpp
new file mode 100644
index 0000000..8cdfc6e
--- /dev/null
+++ b/test_conformance/clcpp/api/test_ctors.hpp

@@ -0,0 +1,481 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <numeric>
+
+#include "../common.hpp"
+
+// TEST 1
+// Verify that constructors are executed before any kernel is executed.
+// Verify that when present, multiple constructors are executed. The order between
+// constructors is undefined, but they should all execute.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_ctors_executed =
+    "__kernel void test_ctors_executed(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+const char * kernel_test_ctors_executed_multiple_ctors =
+    "__kernel void test_ctors_executed_multiple_ctors(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_ctors_executed =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(int y) { x = y;};\n"
+    "   int x;\n"
+    "};\n"
+    // global scope program variable
+    "ctor_test_class global_var(int(0xbeefbeef));\n"
+    "__kernel void test_ctors_executed(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var.x != int(0xbeefbeef)) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+const char * kernel_test_ctors_executed_multiple_ctors =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_limits>\n"
+    "using namespace cl;\n"
+    "template<class T>\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(T y) { x = y;};\n"
+    "   T x;\n"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class<int> global_var0(int(0xbeefbeef));\n"
+    "ctor_test_class<uint> global_var1(uint(0xbeefbeefU));\n"
+    "ctor_test_class<float> global_var2(float(FLT_MAX));\n"
+    "__kernel void test_ctors_executed_multiple_ctors(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var0.x != int(0xbeefbeef))   result = 1;\n"
+    "   if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
+    "   if(global_var2.x != float(FLT_MAX))    result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+int test_ctors_execution(cl_device_id device,
+                         cl_context context,
+                         cl_command_queue queue,
+                         int count,
+                         std::string kernel_name,
+                         const char * kernel_source)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name, "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test %s failed.", kernel_name.c_str());
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_global_scope_ctors_executed)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = test_ctors_execution(
+        device, context, queue, count,
+        "test_ctors_executed", kernel_test_ctors_executed
+    );
+    CHECK_ERROR(local_error);
+    error |= local_error;
+
+    local_error = test_ctors_execution(
+        device, context, queue, count,
+        "test_ctors_executed_multiple_ctors", kernel_test_ctors_executed_multiple_ctors
+    );
+    CHECK_ERROR(local_error);
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+// TEST 2
+// Verify that constructors are only executed once when multiple kernels from a program are executed.
+
+// How: The first kernel (test_ctors_executed_once_set) is run once. It changes values of program scope
+// variables, then the second kernel is run multiple times, each time verifying that global variables
+// have correct values (the second kernel should observe the values assigned by the first kernel, not
+// by the constructors).
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_ctors_executed_once =
+    "__kernel void test_ctors_executed_once_set()\n"
+    "{\n"
+    "}\n"
+    "__kernel void test_ctors_executed_once_read(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_ctors_executed_once =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct template
+    "template<class T>\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(T y) { x = y;};\n"
+    "   T x;\n"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class<int> global_var0(int(0));\n"
+    "ctor_test_class<uint> global_var1(uint(0));\n"
+
+    "__kernel void test_ctors_executed_once_set()\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   if(gid == 0) {\n"
+    "       global_var0.x = int(0xbeefbeef);\n"
+    "       global_var1.x = uint(0xbeefbeefU);\n"
+    "   }\n"
+    "}\n\n"
+
+    "__kernel void test_ctors_executed_once_read(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var0.x != int(0xbeefbeef))   result = 1;\n"
+    "   if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_ctors_executed_once)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel_set_global_vars;
+    cl_kernel kernel_read_global_vars;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set", "", false
+    );
+    RETURN_ON_ERROR(error)
+    // Get the second kernel
+    kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel");
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set"
+    );
+    RETURN_ON_ERROR(error)
+    // Get the second kernel
+    kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel");
+#endif
+
+    // Execute kernel_set_global_vars
+
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel_set_global_vars, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    // Execute kernel_read_global_vars 4 times, each time we check if
+    // global variables have correct values.
+
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    for(size_t i = 0; i < 4; i++)
+    {
+        std::fill(output.begin(), output.end(), cl_uint(1));
+        error = clEnqueueWriteBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_uint) * output.size(),
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+        error = clSetKernelArg(kernel_read_global_vars, 0, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        work_size[0] = output.size();
+        error = clEnqueueNDRangeKernel(
+            queue, kernel_read_global_vars,
+            dim, NULL, work_size, NULL,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_uint) * output.size(),
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+        if(sum != 0)
+        {
+            error = -1;
+            CHECK_ERROR_MSG(error, "Test test_ctors_executed_onces failed.");
+        }
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel_set_global_vars);
+    clReleaseKernel(kernel_read_global_vars);
+    clReleaseProgram(program);
+    return error;
+}
+
+// TEST3
+// Verify that when constructor is executed, the ND-range used is (1,1,1).
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_ctors_ndrange =
+    "__kernel void test_ctors_ndrange(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_ctors_ndrange =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class() {\n"
+    "       x = get_global_size(0);\n"
+    "       y = get_global_size(1);\n"
+    "       z = get_global_size(2);\n"
+    "   };\n"
+    "   ulong x;\n"
+    "   ulong y;\n"
+    "   ulong z;\n"
+    // return true if the ND-range used when ctor was exectured was
+    // (1, 1, 1); otherwise - false
+    "   bool check() { return (x == 1) && (y == 1) && (z == 1);}"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class global_var0;\n"
+    "ctor_test_class global_var1;\n"
+
+    "__kernel void test_ctors_ndrange(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(!global_var0.check()) result = 1;\n"
+    "   if(!global_var1.check()) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_ctors_ndrange)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_ctors_executed_ndrange failed.");
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP

diff --git a/test_conformance/clcpp/api/test_ctors_dtors.hpp b/test_conformance/clcpp/api/test_ctors_dtors.hpp
new file mode 100644
index 0000000..02838fa
--- /dev/null
+++ b/test_conformance/clcpp/api/test_ctors_dtors.hpp

@@ -0,0 +1,185 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+#include "../common.hpp"
+
+// Verify queries clGetProgramInfo correctly return the presence of constructors and/or destructors
+// in the program (using option CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT)
+// (both are present, either one is present, none is present).
+
+std::string generate_ctor_dtor_program(const bool ctor, const bool dtor)
+{
+    std::string program;
+    if(ctor)
+    {
+        program +=
+            "struct ctor_test_class {\n"
+            // non-trivial ctor
+            "   ctor_test_class(int y) { x = y;};\n"
+            "   int x;\n"
+            "};\n"
+            "ctor_test_class ctor = ctor_test_class(1024);\n"
+        ;
+    }
+    if(dtor)
+    {
+        program +=
+            "struct dtor_test_class {\n"
+            // non-trivial dtor
+            "   ~dtor_test_class() { x = -1024; };\n"
+            "   int x;\n"
+            "};\n"
+            "dtor_test_class dtor;\n"
+        ;
+    }
+    program += "__kernel void test_ctor_dtor()\n {\n }\n";
+    return program;
+}
+
+int test_get_program_info_global_ctors_dtors_present(cl_device_id device,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     const bool ctor,
+                                                     const bool dtor)
+{
+    int error = CL_SUCCESS;
+    cl_program program;
+
+    // program source and options
+    std::string options = "";
+    std::string source = generate_ctor_dtor_program(ctor, dtor);
+    const char * source_ptr = source.c_str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    return CL_SUCCESS;
+// Normal run
+#else
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
+    RETURN_ON_ERROR(error)
+#endif
+
+    // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool
+    // This indicates that the program object contains non-trivial constructor(s) that will be
+    // executed by runtime before any kernel from the program is executed.
+
+    // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool
+    // This indicates that the program object contains non-trivial destructor(s) that will be
+    // executed by runtime when program is destroyed.
+
+    // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
+    cl_bool ctors_present;
+    size_t cl_bool_size;
+    error = clGetProgramInfo(
+        program,
+        CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
+        sizeof(cl_bool),
+        static_cast<void*>(&ctors_present),
+        &cl_bool_size
+    );
+    RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+    if(cl_bool_size != sizeof(cl_bool))
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
+    }
+    if(ctor && ctors_present != CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.");
+    }
+    else if(!ctor && ctors_present == CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.");
+    }
+
+    // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT
+    cl_bool dtors_present = 0;
+    error = clGetProgramInfo(
+        program,
+        CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
+        sizeof(cl_bool),
+        static_cast<void*>(&ctors_present),
+        &cl_bool_size
+    );
+    RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+    if(cl_bool_size != sizeof(cl_bool))
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
+    }
+    if(dtor && dtors_present != CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.");
+    }
+    else if(!dtor && dtors_present == CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.");
+    }
+
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_global_scope_ctors_dtors_present)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // both present
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, true);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // dtor
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, true);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // ctor
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, false);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // none present
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, false);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP

diff --git a/test_conformance/clcpp/api/test_dtors.hpp b/test_conformance/clcpp/api/test_dtors.hpp
new file mode 100644
index 0000000..2f4fd0c
--- /dev/null
+++ b/test_conformance/clcpp/api/test_dtors.hpp

@@ -0,0 +1,553 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <numeric>
+
+#include "../common.hpp"
+
+// TEST 1
+// Verify that destructor is executed.
+
+// How: destructor of struct dtor_test_class has a side effect: zeroing buffer. If values
+// in buffer are not zeros after releasing program, destructor was not executed.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtor_is_executed =
+    "__kernel void test_dtor_is_executed(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_dtor_is_executed =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // set all values in buffer to 0
+    "   ~dtor_test_class() {\n"
+    "       for(ulong i = 0; i < size; i++)\n"
+    "           buffer[i] = 0;\n"
+    "   };\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "   ulong size;\n"
+    "};\n"
+    // global scope program variable
+    "dtor_test_class global_var;\n"
+
+    // values in output __MUST BE__ greater than 0 for the test to work
+    // correctly
+    "__kernel void test_dtor_is_executed(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       global_var.buffer = output;\n"
+    "       global_var.size = get_global_size(0);\n"
+    "   }\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_dtor_is_executed)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    // Release kernel and program
+    // Dtor should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_dtor_is_executed failed.");
+    }
+
+    clReleaseMemObject(output_buffer);
+    return error;
+}
+
+// TEST 2
+// Verify that multiple destructors, if present, are executed. Order between multiple
+// destructors is undefined.
+// Verify that each destructor is executed only once.
+
+// How:
+// 0) dtor_test_class struct has a global pointer to a buffer, it's set by
+// test_dtors_executed_once kernel.
+// 1) Destructors have a side effect: each dtor writes to its part of the buffer. If all
+// dtors are executed, all values in that buffer should be changed.
+// 2) The first time destructors are executed, they set their parts of the buffer to zero.
+// Next time to 1, next time to 2 etc. Since dtors should be executed only once, all
+// values in that buffer should be equal to zero.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtors_executed_once =
+    "__kernel void test_dtors_executed_once(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_dtors_executed_once =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // Set all values in range [start; end - 1] in buffer to counter.
+    // If dtor is executed only once (correct), all values in range
+    // [start; end - 1] in buffer should be equal to zero after releasing
+    // the program
+    "   ~dtor_test_class() {\n"
+    "       for(ulong i = start; i < end; i++){\n"
+    "           buffer[i] = counter;\n"
+    "       };\n"
+    "       counter++;\n"
+    "   };\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "   ulong start;\n"
+    "   ulong end;\n"
+    "   ulong counter;\n"
+    "};\n"
+    // global scope program variables
+    "dtor_test_class global_var0;\n"
+    "dtor_test_class global_var1;\n"
+    "dtor_test_class global_var2;\n"
+    "dtor_test_class global_var3;\n"
+
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    "__kernel void test_dtors_executed_once(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       ulong end = get_global_size(0) / 4;"
+    // global_var0
+    "       global_var0.buffer = output;\n"
+    "       global_var0.start = 0;\n"
+    "       global_var0.end = end;\n"
+    "       global_var0.counter = 0;\n"
+    // global_var1
+    "       global_var1.buffer = output;\n"
+    "       global_var1.start = end;\n"
+    "       end += get_global_size(0) / 4;\n"
+    "       global_var1.end = end;\n"
+    "       global_var1.counter = 0;\n"
+    // global_var2
+    "       global_var2.buffer = output;\n"
+    "       global_var2.start = end;\n"
+    "       end += get_global_size(0) / 4;\n"
+    "       global_var2.end = end;\n"
+    "       global_var2.counter = 0;\n"
+    // global_var3
+    "       global_var3.buffer = output;\n"
+    "       global_var3.start = end;\n"
+    "       global_var3.end = get_global_size(0);\n"
+    "       global_var3.counter = 0;\n"
+    "   }\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_dtors_executed_once)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    cl_uint init_value = cl_uint(0xbeefbeef);
+    std::vector<cl_uint> output(count, init_value);
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+
+    // Increments the program reference count. Twice
+    error = clRetainProgram(program);
+    RETURN_ON_CL_ERROR(error, "clRetainProgram")
+    error = clRetainProgram(program);
+    RETURN_ON_CL_ERROR(error, "clRetainProgram")
+
+    // Should just decrement the program reference count.
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Should just decrement the program reference count.
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+#ifndef USE_OPENCLC_KERNELS
+    // At this point global scope variables should not be destroyed,
+    // values in output buffer should not be modified.
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    for(auto& i : output)
+    {
+        if(i != init_value)
+        {
+            log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
+            log_error("\tDestructors were executed prematurely.\n");
+            RETURN_ON_ERROR(-1)
+        }
+    }
+#endif
+
+    // Release kernel and program, destructors should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
+        // Maybe some dtors were not run?
+        for(auto& i : output)
+        {
+            if(i == init_value)
+            {
+                log_error("\tSome dtors were not executed.");
+                break;
+            }
+        }
+        log_error("\n");
+        RETURN_ON_ERROR(-1)
+    }
+
+    // Clean
+    clReleaseMemObject(output_buffer);
+    return error;
+}
+
+// TEST3
+// Verify that ND-range during destructor execution is set to (1,1,1)
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtor_ndrange =
+    "__kernel void test_dtor_ndrange(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_dtor_ndrange =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // set all values in buffer to 0 only if ND-range is (1, 1, 1)
+    "   ~dtor_test_class() {\n"
+    "       if(check()){\n"
+    "           for(ulong i = 0; i < size; i++)\n"
+    "               buffer[i] = 0;\n"
+    "       }\n"
+    "   };\n"
+    // return true if the ND-range is (1, 1, 1); otherwise - false
+    "   bool check() {\n"
+    "       return (get_global_size(0) == 1)"
+              " && (get_global_size(1) == 1)"
+              " && (get_global_size(2) == 1);\n"
+    "   }"
+    "   ulong size;\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "};\n"
+    // global scope program variable
+    "dtor_test_class global_var;\n"
+
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    "__kernel void test_dtor_ndrange(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       global_var.buffer = output;\n"
+    "       global_var.size = get_global_size(0);\n"
+    "   }\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_dtor_ndrange)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    // Release kernel and program
+    // Dtor should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_dtor_ndrange failed.");
+    }
+
+    clReleaseMemObject(output_buffer);
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP

diff --git a/test_conformance/clcpp/api/test_spec_consts.hpp b/test_conformance/clcpp/api/test_spec_consts.hpp
new file mode 100644
index 0000000..1d06168
--- /dev/null
+++ b/test_conformance/clcpp/api/test_spec_consts.hpp

@@ -0,0 +1,474 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+#include "../common.hpp"
+
+// TEST 1
+// Verify that if left unset the specialization constant defaults to the default value set in SPIR-V (zero).
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_defaults =
+    "__kernel void test_spec_consts_defaults(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_spec_consts_defaults =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "using namespace cl;\n"
+    "spec_constant<char,  1> spec1(0);\n"
+    "spec_constant<uchar, 2> spec2(0);\n"
+    "spec_constant<short, 3> spec3(0);\n"
+    "spec_constant<ushort,4> spec4(0);\n"
+    "spec_constant<int,   5> spec5(0);\n"
+    "spec_constant<uint,  6> spec6(0);\n"
+    "spec_constant<long,  7> spec7(0);\n"
+    "spec_constant<ulong, 8> spec8(0);\n"
+    "spec_constant<float, 9> spec9(0.0f);\n"
+    "#ifdef cl_khr_fp64\n"
+    "spec_constant<double, 10> spec10(0.0);\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "spec_constant<half, 11> spec11(0.0h);\n"
+    "#endif\n"
+    "__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != char(0))   result = 1;\n"
+    "   if(get(spec2) != uchar(0))  result = 1;\n"
+    "   if(get(spec3) != short(0))  result = 1;\n"
+    "   if(get(spec4) != ushort(0)) result = 1;\n"
+    "   if(get(spec5) != int(0))    result = 1;\n"
+    "   if(get(spec6) != uint(0))   result = 1;\n"
+    "   if(get(spec7) != long(0))   result = 1;\n"
+    "   if(get(spec8) != ulong(0))  result = 1;\n"
+    "   if(get(spec9) != float(0))  result = 1;\n"
+    "#ifdef cl_khr_fp64\n"
+    "   if(get(spec10) != double(0)) result = 1;\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "   if(get(spec11) != half(0)) result = 1;\n"
+    "#endif\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_spec_consts_defaults)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    // Spec constants are NOT set before clBuildProgram (called in create_opencl_kernel), so
+    // they all should default to the default value set in SPIR-V (zero).
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKerne")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    // if output[0] != 0, then some spec constant(s) did not default to zero.
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_defaults failed, output[0]: %d.", output[0])
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+// TEST 2
+// Verify that setting an existing specialization constant affects only
+// the value of that constant and not of other specialization constants.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_many_constants =
+    "__kernel void test_spec_consts_many_constants(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_spec_consts_many_constants =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "using namespace cl;\n"
+    "spec_constant<int, 1> spec1(0);\n"
+    "spec_constant<int, 2> spec2(0);\n"
+    "spec_constant<int, 3> spec3(0);\n"
+    "__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != int(-1024)) result += 1;\n"
+    "   if(get(spec2) != int(0))     result += 2;\n"
+    "   if(get(spec3) != int(1024))  result += 4;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_spec_consts_many_constants)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_many_constants);
+    RETURN_ON_ERROR(error)
+
+    // Set specialization constants
+
+    // clSetProgramSpecializationConstant(
+    //     cl_program /* program */, cl_uint /* spec_id */, size_t  /* spec_size */,const void* /* spec_value */
+    // )
+    cl_int spec1 = -1024;
+    cl_int spec3 = 1024;
+    // Set spec1
+    error = clSetProgramSpecializationConstant(program, cl_uint(1), sizeof(cl_int), static_cast<void*>(&spec1));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Specialization constant spec2 should default to zero
+    // Set spec3
+    error = clSetProgramSpecializationConstant(program, cl_uint(3), sizeof(cl_int), static_cast<void*>(&spec3));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+
+    // Build program and create kernel
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    // if output[0] != 0, then values of spec constants were incorrect
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_many_constants failed, output[0]: %d.", output[0]);
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+// TEST 3
+// Verify that the API correctly handles the size of a specialization constant by exercising
+// the API for specialization constants of different types (int, bool, float, etc.)
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_different_types =
+    "__kernel void test_spec_consts_different_types(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_spec_consts_different_types =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "#include <opencl_limits>\n"
+    "using namespace cl;\n"
+    "spec_constant<char,  1> spec1(0);\n"
+    "spec_constant<uchar, 2> spec2(0);\n"
+    "spec_constant<short, 3> spec3(0);\n"
+    "spec_constant<ushort,4> spec4(0);\n"
+    "spec_constant<int,   5> spec5(0);\n"
+    "spec_constant<uint,  6> spec6(0);\n"
+    "spec_constant<long,  7> spec7(0);\n"
+    "spec_constant<ulong, 8> spec8(0);\n"
+    "spec_constant<float, 9> spec9(0.0f);\n"
+    "#ifdef cl_khr_fp64\n"
+    "spec_constant<double, 10> spec10(0.0);\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "spec_constant<half, 11> spec11(0.0h);\n"
+    "#endif\n"
+    "__kernel void test_spec_consts_different_types(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != char(CHAR_MAX))    result += 1;\n"
+    "   if(get(spec2) != uchar(UCHAR_MAX))  result += 2;\n"
+    "   if(get(spec3) != short(SHRT_MAX))   result += 4;\n"
+    "   if(get(spec4) != ushort(USHRT_MAX)) result += 8;\n"
+    "   if(get(spec5) != int(INT_MAX))      result += 16;\n"
+    "   if(get(spec6) != uint(UINT_MAX))    result += 32;\n"
+    "   if(get(spec7) != long(LONG_MAX))    result += 64;\n"
+    "   if(get(spec8) != ulong(ULONG_MAX))  result += 128;\n"
+    "   if(get(spec9) != float(FLT_MAX))    result += 256;\n"
+    "#ifdef cl_khr_fp64\n"
+    "   if(get(spec10) != double(DBL_MAX)) result += 512;\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "   if(get(spec11) != half(HALF_MAX)) result += 1024;\n"
+    "#endif\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+
+AUTO_TEST_CASE(test_spec_consts_different_types)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", options);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_different_types, options.c_str());
+    RETURN_ON_ERROR(error)
+
+    // Set specialization constants
+    cl_uint spec_id = 1;
+
+    cl_char   spec1 = CL_CHAR_MAX;
+    cl_uchar  spec2 = CL_UCHAR_MAX;
+    cl_short  spec3 = CL_SHRT_MAX;
+    cl_ushort spec4 = CL_USHRT_MAX;
+    cl_int    spec5 = CL_INT_MAX;
+    cl_uint   spec6 = CL_UINT_MAX;
+    cl_long   spec7 = CL_LONG_MAX;
+    cl_ulong  spec8 = CL_ULONG_MAX;
+    cl_float  spec9 = CL_FLT_MAX;
+    cl_double spec10 = CL_DBL_MAX;
+    cl_half   spec11 = CL_HALF_MAX;
+
+    // Set spec1
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_char), static_cast<void*>(&spec1));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec2
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uchar), static_cast<void*>(&spec2));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec3
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_short), static_cast<void*>(&spec3));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec4
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ushort), static_cast<void*>(&spec4));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec5
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_int), static_cast<void*>(&spec5));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec6
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uint), static_cast<void*>(&spec6));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec7
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_long), static_cast<void*>(&spec7));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec8
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ulong), static_cast<void*>(&spec8));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec9
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_float), static_cast<void*>(&spec9));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec10
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = clSetProgramSpecializationConstant(program, cl_uint(10), sizeof(cl_double), static_cast<void*>(&spec10));
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+    // Set spec11
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        error = clSetProgramSpecializationConstant(program, cl_uint(11), sizeof(cl_half), static_cast<void*>(&spec11));
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+
+    // Build program and create kernel
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // Copy output to output_buffer, run kernel, copy output_buffer back to output, check result
+
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    // if output[0] != 0, then some spec constants had incorrect values
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_different_types failed, output[0]: %d.", output[0])
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP

diff --git a/test_conformance/clcpp/atomics/CMakeLists.txt b/test_conformance/clcpp/atomics/CMakeLists.txt
new file mode 100644
index 0000000..4fb4bfd
--- /dev/null
+++ b/test_conformance/clcpp/atomics/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_ATOMICS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/atomics/atomic_fetch.hpp b/test_conformance/clcpp/atomics/atomic_fetch.hpp
new file mode 100644
index 0000000..5618375
--- /dev/null
+++ b/test_conformance/clcpp/atomics/atomic_fetch.hpp

@@ -0,0 +1,306 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
+#define TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+
+const size_t atomic_bucket_size = 100;
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class type>
+std::string generate_kernel_atomic_fetch(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
+    std::string function_call = "atomic_" + func.str() + "(&" + out1_value + ", " + in1_value + ")";
+    return
+        "" + func.defs() +
+        "__kernel void test_" + func.str() + "(global " + type_name<type>() + " *input, global atomic_" + type_name<type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class type>
+std::string generate_kernel_atomic_fetch(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
+    std::string function_call = func.str() + "(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<type>() +  "[]> input,"
+                                              "global_ptr<atomic<" + type_name<type>() + ">[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + out1_value + "." + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class TYPE, class atomic_fetch>
+bool verify_atomic_fetch(const std::vector<TYPE> &in, const std::vector<TYPE> &out, atomic_fetch op)
+{
+    for (size_t i = 0; i < out.size(); i++)
+    {
+        TYPE expected = op.init_out();
+        for (size_t k = 0; k < atomic_bucket_size; k++)
+        {
+            const size_t in_i = i * atomic_bucket_size + k;
+            if (in_i >= in.size())
+                break;
+            expected = op(expected, in[in_i]);
+        }
+        if (expected != out[i])
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class atomic_fetch>
+int test_atomic_fetch_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, atomic_fetch op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename atomic_fetch::in_type TYPE;
+
+    // Don't run test for unsupported types
+    if (!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    if (sizeof(TYPE) == 8 &&
+        (!is_extension_available(device, "cl_khr_int64_base_atomics") ||
+         !is_extension_available(device, "cl_khr_int64_extended_atomics")))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_atomic_fetch<atomic_fetch, TYPE>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<TYPE> input = generate_input<TYPE>(count, op.min1(), op.max1(), std::vector<TYPE>());
+    std::vector<TYPE> output = generate_output<TYPE>((count - 1) / atomic_bucket_size + 1);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+
+    const TYPE pattern = op.init_out();
+    err = clEnqueueFillBuffer(queue, buffers[1], &pattern, sizeof(pattern), 0, sizeof(TYPE) * output.size(), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueFillBuffer")
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+    err = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
+
+    if (!verify_atomic_fetch(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s failed", op.str().c_str(), type_name<TYPE>().c_str());
+    }
+    log_info("test_%s %s passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+
+template<class TYPE>
+struct atomic_fetch
+{
+    typedef TYPE in_type;
+
+    std::string decl_str()
+    {
+        return type_name<TYPE>();
+    }
+
+    std::string defs()
+    {
+        std::string defs;
+        if (sizeof(TYPE) == 8)
+        {
+            defs += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
+            defs += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
+        }
+        return defs;
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_atomic>\n";
+    }
+
+    TYPE min1()
+    {
+        return 0;
+    }
+
+    TYPE max1()
+    {
+        return 1000;
+    }
+};
+
+
+#define DEF_ATOMIC_FETCH_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION, INIT_OUT) \
+template<class TYPE> \
+struct CLASS_NAME : public atomic_fetch<TYPE> \
+{ \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    TYPE init_out() \
+    { \
+        return INIT_OUT; \
+    } \
+    \
+    TYPE operator()(const TYPE& x, const TYPE& y) \
+    { \
+        return HOST_FUNC_EXPRESSION; \
+    } \
+};
+
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_add, fetch_add, x + y, 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_sub, fetch_sub, x - y, (std::numeric_limits<TYPE>::max)())
+
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_and, fetch_and, x & y, (std::numeric_limits<TYPE>::max)())
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_or,  fetch_or,  x | y, 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_xor, fetch_xor, x ^ y, 0)
+
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_max, fetch_max, (std::max)(x, y), 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_min, fetch_min, (std::min)(x, y), (std::numeric_limits<TYPE>::max)())
+
+#undef DEF_ATOMIC_FETCH_FUNC
+
+
+AUTO_TEST_CASE(test_atomic_fetch)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_ATOMIC_MACRO(TEST_CLASS) \
+    last_error = test_atomic_fetch_func( \
+        device, context, queue, n_elems, TEST_CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_ulong>()))
+
+#undef TEST_ATOMIC_MACRO
+
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP

diff --git a/test_conformance/clcpp/atomics/main.cpp b/test_conformance/clcpp/atomics/main.cpp
new file mode 100644
index 0000000..7103998
--- /dev/null
+++ b/test_conformance/clcpp/atomics/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "atomic_fetch.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/attributes/CMakeLists.txt b/test_conformance/clcpp/attributes/CMakeLists.txt
new file mode 100644
index 0000000..1b1c15a
--- /dev/null
+++ b/test_conformance/clcpp/attributes/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_ATTRIBUTES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/attributes/main.cpp b/test_conformance/clcpp/attributes/main.cpp
new file mode 100644
index 0000000..765867e
--- /dev/null
+++ b/test_conformance/clcpp/attributes/main.cpp

@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_ivdep.hpp"
+#include "test_max_size.hpp"
+#include "test_required_num_sub_groups.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/attributes/test_ivdep.hpp b/test_conformance/clcpp/attributes/test_ivdep.hpp
new file mode 100644
index 0000000..17b1f58
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_ivdep.hpp

@@ -0,0 +1,418 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
+#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_ivdep {
+
+enum class loop_kind
+{
+    for_loop,
+    while_loop,
+    do_loop
+};
+
+struct test_options
+{
+    loop_kind loop;
+    int ivdep_length;
+    int offset1;
+    int offset2;
+    int iter_count;
+    bool offset1_param;
+    bool offset2_param;
+    bool iter_count_param;
+    bool cond_in_header;
+    bool init_in_header;
+    bool incr_in_header;
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
+    std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
+
+    std::string init = "i = 0";
+    std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
+    std::string incr = "i += 2";
+
+    std::stringstream s;
+    s << R"(
+    kernel void test(global int *a, global int *b, global int *c, int offset1, int offset2, int iter_count)
+    {
+        int i;
+    )";
+
+    // Loop #1
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    // Loop #2
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    s << "}" << std::endl;
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
+    std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
+
+    std::string init = "i = 0";
+    std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
+    std::string incr = "i += 2";
+
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+
+    using namespace cl;
+    )";
+    s << R"(
+    kernel void test(global_ptr<int[]> a, global_ptr<int[]> b, global_ptr<int[]> c, int offset1, int offset2, int iter_count)
+    {
+        int i;
+    )";
+
+    // Loop #1
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.ivdep_length > 0) s << "[[cl::ivdep]]" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    // Loop #2
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.ivdep_length > 0) s << "[[cl::ivdep(" << options.ivdep_length << ")]]" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    s << "}" << std::endl;
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    const size_t count = 100;
+    const size_t global_size = 1;
+
+    std::vector<int> a(count);
+    std::vector<int> b(count);
+    std::vector<int> c(count);
+    for (size_t i = 0; i < count; i++)
+    {
+        a[i] = 0;
+        b[i] = i;
+        c[i] = 1;
+    }
+
+    cl_mem a_buffer;
+    a_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(a.data()), &error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    cl_mem b_buffer;
+    b_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(b.data()), &error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    cl_mem c_buffer;
+    c_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(c.data()),&error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 3, sizeof(cl_int), &options.offset1);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 4, sizeof(cl_int), &options.offset2);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 5, sizeof(cl_int), &options.iter_count);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    std::vector<int> a_output(count);
+    error = clEnqueueReadBuffer(
+        queue, a_buffer, CL_TRUE,
+        0, sizeof(int) * count,
+        static_cast<void *>(a_output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    for (int i = 0; i < options.iter_count; i += 2)
+    {
+        a[i + options.offset1] = b[i + options.offset1] * c[i + options.offset1];
+    }
+
+    for (int i = 0; i < options.iter_count; i += 2)
+    {
+        a[i + options.offset2] = a[i] + b[i];
+    }
+
+    for (size_t i = 0; i < count; i++)
+    {
+        const int value = a_output[i];
+        const int expected = a[i];
+        if (value != expected)
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "Test failed. Element %lu: %d should be: %d",
+                i, value, expected
+            );
+        }
+    }
+
+    clReleaseMemObject(a_buffer);
+    clReleaseMemObject(b_buffer);
+    clReleaseMemObject(c_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+const std::vector<std::tuple<int, int, int>> params{
+    std::make_tuple<int, int, int>( -1, 0, 0 ),
+    std::make_tuple<int, int, int>( -1, 3, 4 ),
+    std::make_tuple<int, int, int>( 1, 1, 1 ),
+    std::make_tuple<int, int, int>( 3, 4, 2 ),
+    std::make_tuple<int, int, int>( 3, 4, 3 ),
+    std::make_tuple<int, int, int>( 8, 10, 7 ),
+    std::make_tuple<int, int, int>( 16, 16, 16 )
+};
+const std::vector<int> iter_counts{ { 1, 4, 12, 40 } };
+
+AUTO_TEST_CASE(test_ivdep_for)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    for (bool init_in_header : { false, true })
+    for (bool incr_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::for_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = init_in_header;
+        options.incr_in_header = incr_in_header;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_ivdep_while)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::while_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = false;
+        options.incr_in_header = false;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_ivdep_do)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::do_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = false;
+        options.incr_in_header = false;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP

diff --git a/test_conformance/clcpp/attributes/test_max_size.hpp b/test_conformance/clcpp/attributes/test_max_size.hpp
new file mode 100644
index 0000000..15e7ead
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_max_size.hpp

@@ -0,0 +1,266 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
+#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_max_size {
+
+enum class address_space
+{
+    constant,
+    local
+};
+
+enum class param_kind
+{
+    ptr_type, // constant_ptr<T>
+    ptr,      // constant<T>*
+    ref       // constant<T>&
+};
+
+const param_kind param_kinds[] =
+{
+    param_kind::ptr_type,
+    param_kind::ptr,
+    param_kind::ref
+};
+
+struct test_options
+{
+    address_space space;
+    int max_size;
+    bool spec_const;
+    param_kind kind;
+    bool array;
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << "kernel void test(";
+    s << (options.space == address_space::constant ? "constant" : "local");
+    s << " int2 *input) { }" << std::endl;
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::string type_str = "int2";
+    if (options.array)
+        type_str += "[]";
+
+    std::stringstream s;
+    s << "#include <opencl_memory>" << std::endl;
+
+    if (options.spec_const)
+    {
+        s << "#include <opencl_spec_constant>" << std::endl;
+        s << "cl::spec_constant<int, 1> max_size_spec{ 1234567890 };" << std::endl;
+    }
+
+    s << "kernel void test(";
+    s << "[[cl::max_size(" << (options.spec_const ? "max_size_spec" : std::to_string(options.max_size)) << ")]] ";
+    s << (options.space == address_space::constant ? "cl::constant" : "cl::local");
+    if (options.kind == param_kind::ptr_type)
+        s << "_ptr<" << type_str << ">";
+    else if (options.kind == param_kind::ptr)
+        s << "<" << type_str << ">*";
+    else if (options.kind == param_kind::ref)
+        s << "<" << type_str << ">&";
+    s << " input) { }" << std::endl;
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    const char *source_c_str = source.c_str();
+    error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
+    RETURN_ON_ERROR(error)
+
+    if (options.spec_const)
+    {
+        error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_int), &options.max_size);
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    const int max_size = options.max_size;
+    const int sizes[] = {
+        1,
+        max_size / 2,
+        max_size,
+        max_size + 1,
+        max_size * 2
+    };
+
+    for (int size : sizes)
+    {
+        cl_mem const_buffer;
+        if (options.space == address_space::constant)
+        {
+            const_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &error);
+            RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+            error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &const_buffer);
+            // Check the status later (depending on size and max_size values)
+        }
+        else if (options.space == address_space::local)
+        {
+            error = clSetKernelArg(kernel, 0, size, NULL);
+            // Check the status later (depending on size and max_size values)
+        }
+
+        if (size <= max_size)
+        {
+            // Correct value, must not fail
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            const size_t global_size = 123;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+            error = clFinish(queue);
+            RETURN_ON_CL_ERROR(error, "clFinish")
+        }
+        else
+        {
+            // Incorrect value, must fail
+            if (error != CL_MAX_SIZE_RESTRICTION_EXCEEDED)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "clSetKernelArg must fail with CL_MAX_SIZE_RESTRICTION_EXCEEDED,"
+                    " but returned %s (%d)", get_cl_error_string(error).c_str(), error
+                );
+            }
+        }
+
+        if (options.space == address_space::constant)
+        {
+            error = clReleaseMemObject(const_buffer);
+            RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_max_size_constant)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    cl_ulong max_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(max_size), &max_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    for (bool spec_const : { false, true })
+    for (auto kind : param_kinds)
+    for (bool array : { false, true })
+    {
+        test_options options;
+        options.space = address_space::constant;
+        options.max_size = max_size / 2;
+        options.spec_const = spec_const;
+        options.kind = kind;
+        options.array = array;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_max_size_local)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    cl_ulong max_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_size), &max_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    for (bool spec_const : { false, true })
+    for (auto kind : param_kinds)
+    for (bool array : { false, true })
+    {
+        test_options options;
+        options.space = address_space::local;
+        options.max_size = max_size / 2;
+        options.spec_const = spec_const;
+        options.kind = kind;
+        options.array = array;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP

diff --git a/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp b/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp
new file mode 100644
index 0000000..2380eaf
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp

@@ -0,0 +1,285 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
+#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_required_num_sub_groups {
+
+struct test_options
+{
+    size_t num_sub_groups;
+    bool spec_const;
+    size_t max_count;
+    size_t num_tests;
+};
+
+struct output_type
+{
+    cl_ulong num_sub_groups;
+    cl_ulong enqueued_num_sub_groups;
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    ulong num_sub_groups;
+    ulong enqueued_num_sub_groups;
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+    kernel void test(global struct output_type *output)
+    {
+        const ulong gid = get_global_linear_id();
+        output[gid].num_sub_groups = get_num_sub_groups();
+        output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+    }
+    )";
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    using namespace cl;
+    )";
+
+    if (options.spec_const)
+    {
+        s << "#include <opencl_spec_constant>" << std::endl;
+        s << "cl::spec_constant<uint, 1> num_sub_groups_spec{ 1234567890 };" << std::endl;
+    }
+
+    s << source_common << std::endl;
+    s << "[[cl::required_num_sub_groups(" << (options.spec_const ? "num_sub_groups_spec" : std::to_string(options.num_sub_groups)) << ")]]";
+    s << R"(
+    kernel void test(global_ptr<output_type[]> output)
+    {
+        const ulong gid = get_global_linear_id();
+        output[gid].num_sub_groups = get_num_sub_groups();
+        output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+    }
+    )";
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (!is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    const char *source_c_str = source.c_str();
+    error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
+    RETURN_ON_ERROR(error)
+
+    if (options.spec_const)
+    {
+        cl_uint spec_num_sub_groups = static_cast<cl_uint>(options.num_sub_groups);
+        error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_uint), &spec_num_sub_groups);
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t compile_num_sub_groups;
+    error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
+        0, NULL,
+        sizeof(size_t), &compile_num_sub_groups, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+    if (compile_num_sub_groups != options.num_sub_groups)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "CL_KERNEL_COMPILE_NUM_SUB_GROUPS did not return correct value (expected %lu, got %lu)",
+            options.num_sub_groups, compile_num_sub_groups
+        )
+    }
+
+    cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * options.max_count, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
+
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        for (size_t dim = 1; dim <= 3; dim++)
+        {
+            size_t global_size[3] = { 1, 1, 1 };
+            size_t count = count_dis(gen);
+            std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
+            for (size_t d = 0; d < dim; d++)
+            {
+                global_size[d] = global_size_dis(gen);
+            }
+            count = global_size[0] * global_size[1] * global_size[2];
+
+            size_t local_size[3] = { 1, 1, 1 };
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
+                sizeof(size_t), &options.num_sub_groups,
+                sizeof(size_t) * dim, local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT did not return correct value"
+                )
+            }
+
+            size_t sub_group_count_for_ndrange;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                sizeof(size_t) * dim, local_size,
+                sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (sub_group_count_for_ndrange != options.num_sub_groups)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected %lu, got %lu)",
+                    options.num_sub_groups, sub_group_count_for_ndrange
+                )
+            }
+
+            const char pattern = 0;
+            error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+            error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, global_size, local_size, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+            std::vector<output_type> output(count);
+            error = clEnqueueReadBuffer(
+                queue, output_buffer, CL_TRUE,
+                0, sizeof(output_type) * count,
+                static_cast<void *>(output.data()),
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+            for (size_t gid = 0; gid < count; gid++)
+            {
+                const output_type &o = output[gid];
+
+                if (o.enqueued_num_sub_groups != options.num_sub_groups)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "get_enqueued_num_sub_groups does not equal to required_num_sub_groups")
+                }
+                if (o.num_sub_groups > options.num_sub_groups)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "get_num_sub_groups did not return correct value")
+                }
+            }
+        }
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_required_num_sub_groups)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    cl_uint max_num_sub_groups;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(max_num_sub_groups), &max_num_sub_groups, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    for (bool spec_const : { false, true })
+    for (size_t num_sub_groups = 1; num_sub_groups <= max_num_sub_groups; num_sub_groups++)
+    {
+        test_options options;
+        options.spec_const = spec_const;
+        options.num_sub_groups = num_sub_groups;
+        options.num_tests = 100;
+        options.max_count = num_elements;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP

diff --git a/test_conformance/clcpp/common.hpp b/test_conformance/clcpp/common.hpp
new file mode 100644
index 0000000..e062002
--- /dev/null
+++ b/test_conformance/clcpp/common.hpp

@@ -0,0 +1,51 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
+#define TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cmath>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+// harness framework
+#include "harness/compat.h"
+#include "harness/testHarness.h"
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+
+// autotest
+#include "autotest/autotest.hpp"
+
+// utils_common
+#include "utils_common/is_vector_type.hpp"
+#include "utils_common/scalar_type.hpp"
+#include "utils_common/make_vector_type.hpp"
+#include "utils_common/type_name.hpp"
+#include "utils_common/type_supported.hpp"
+#include "utils_common/vector_size.hpp"
+#include "utils_common/kernel_helpers.hpp"
+#include "utils_common/errors.hpp"
+#include "utils_common/string.hpp"
+
+size_t get_uniform_global_size(size_t global_size, size_t local_size)
+{
+    return static_cast<size_t>(std::ceil(static_cast<double>(global_size) / local_size)) * local_size;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP

diff --git a/test_conformance/clcpp/common_funcs/CMakeLists.txt b/test_conformance/clcpp/common_funcs/CMakeLists.txt
new file mode 100644
index 0000000..5e4d8b0
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_COMMON_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/common_funcs/common_funcs.hpp b/test_conformance/clcpp/common_funcs/common_funcs.hpp
new file mode 100644
index 0000000..d6f8c89
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/common_funcs.hpp

@@ -0,0 +1,417 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+#include <algorithm>
+
+// floatn clamp(floatn x, floatn min, floatn max) (only scalars)
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "clamp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::min)((std::max)(x, minval), maxval);
+    }
+
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(4000.0f);
+    }
+
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(4000.0f));
+    }
+
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)() / IN3(2000.0f);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn degrees(floatn t)
+template<class IN1, class OUT1, class REFERENCE>
+struct common_func_degrees : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "degrees";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    REFERENCE operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        return (REFERENCE(180.0) / CL_M_PI) * static_cast<REFERENCE>(x);
+    }
+
+    float ulp()
+    {
+        return 2.5f;
+    }
+};
+
+// floatn max(floatn x, floatn y)
+template<class IN1, class IN2, class OUT1>
+struct common_func_max : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "max";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::max)(x, y);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn min(floatn x, floatn y)
+template<class IN1, class IN2, class OUT1>
+struct common_func_min : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "min";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::min)(x, y);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn mix(floatn x, floatn y, floatn a);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_mix : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mix";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& a)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        return static_cast<double>(x) + ((static_cast<double>(y) - static_cast<double>(x)) * static_cast<double>(a));
+    }
+
+    IN3 min3()
+    {
+        return IN3(0.0f + CL_FLT_EPSILON);
+    }
+
+    IN3 max3()
+    {
+        return IN3(1.0f - CL_FLT_EPSILON);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+};
+
+// floatn radians(floatn t)
+template<class IN1, class OUT1, class REFERENCE>
+struct common_func_radians : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "radians";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    REFERENCE operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        return (CL_M_PI / REFERENCE(180.0)) * static_cast<REFERENCE>(x);
+    }
+
+    float ulp()
+    {
+        return 2.5f;
+    }
+};
+
+// floatn step(floatn edge, floatn x)
+template<class IN1, class IN2, class OUT1>
+struct common_func_step : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "step";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& edge, const IN2& x)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x < edge)
+            return OUT1(0.0f);
+        return OUT1(1.0f);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn smoothstep(floatn edge0, floatn edge1, floatn x);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_smoothstep : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "smoothstep";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& edge0, const IN2& edge1, const IN3& x)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x <= edge0)
+        {
+            return OUT1(0.0f);
+        }
+        if(x >= edge1)
+        {
+            return OUT1(1.0f);
+        }
+        OUT1 t = (x - edge0) / (edge1 - edge0);
+        t = t * t * (3.0f - 2.0f * t);
+        return t;
+    }
+
+    // edge0 must be < edge1
+    IN1 min1()
+    {
+        return (std::numeric_limits<IN1>::min)();
+    }
+
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() / IN1(8000.0f);
+    }
+
+    IN2 min2()
+    {
+        return IN3(1) + ((std::numeric_limits<IN2>::max)() / IN2(4000.0f));
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2000.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+};
+
+// floatn sign(floatn t)
+template<class IN1, class OUT1>
+struct common_func_sign : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "sign";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x == IN1(-0.0f))
+        {
+            return IN1(-0.0f);
+        }
+        if(x == IN1(+0.0f))
+        {
+            return IN1(+0.0f);
+        }
+        if(x > IN1(0.0f))
+        {
+            return IN1(1.0f);
+        }
+        return IN1(-1.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+
+    std::vector<IN1> in_special_cases()
+    {
+        return { -0.0f, +0.0f };
+    }
+};
+
+AUTO_TEST_CASE(test_common_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // floatn clamp(floatn x, floatn min, floatn max)
+    TEST_TERNARY_FUNC_MACRO((common_func_clamp<cl_float, cl_float, cl_float, cl_float>()))  
+
+    // floatn degrees(floatn t)
+    TEST_UNARY_FUNC_MACRO((common_func_degrees<cl_float, cl_float, cl_double>()))  
+        
+    // floatn max(floatn x, floatn y);
+    TEST_BINARY_FUNC_MACRO((common_func_max<cl_float, cl_float, cl_float>()))
+
+    // floatn min(floatn x, floatn y);
+    TEST_BINARY_FUNC_MACRO((common_func_min<cl_float, cl_float, cl_float>()))
+   
+    // floatn mix(floatn x, floatn y, floatn a);
+    TEST_TERNARY_FUNC_MACRO((common_func_mix<cl_float, cl_float, cl_float, cl_float>()))
+
+    // floatn radians(floatn t)
+    TEST_UNARY_FUNC_MACRO((common_func_radians<cl_float, cl_float, cl_double>()))
+
+    // floatn step(floatn edge, floatn x)
+    TEST_BINARY_FUNC_MACRO((common_func_step<cl_float, cl_float, cl_float>()))
+
+    // floatn smoothstep(floatn edge0, floatn edge1, floatn x)
+    TEST_TERNARY_FUNC_MACRO((common_func_smoothstep<cl_float, cl_float, cl_float, cl_float>()))
+
+    // floatn sign(floatn t);
+    TEST_UNARY_FUNC_MACRO((common_func_sign<cl_float, cl_float>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP

diff --git a/test_conformance/clcpp/common_funcs/main.cpp b/test_conformance/clcpp/common_funcs/main.cpp
new file mode 100644
index 0000000..a66d8f2
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/main.cpp

@@ -0,0 +1,43 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <limits>
+
+#include "../common.hpp"
+
+#include "common_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/convert/CMakeLists.txt b/test_conformance/clcpp/convert/CMakeLists.txt
new file mode 100644
index 0000000..9f69fea
--- /dev/null
+++ b/test_conformance/clcpp/convert/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_CONVERT)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/convert/convert_cast.hpp b/test_conformance/clcpp/convert/convert_cast.hpp
new file mode 100644
index 0000000..81fcca6
--- /dev/null
+++ b/test_conformance/clcpp/convert/convert_cast.hpp

@@ -0,0 +1,309 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
+#define TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <functional>
+
+
+enum class rounding_mode
+{
+    def,
+    /*rte, not implemented here */
+    rtz,
+    rtp,
+    rtn
+};
+
+enum class saturate { def, off, on };
+
+std::string rounding_mode_name(rounding_mode rmode)
+{
+    switch (rmode)
+    {
+        case rounding_mode::rtz: return "rtz";
+        case rounding_mode::rtp: return "rtp";
+        case rounding_mode::rtn: return "rtn";
+        default: return "";
+    }
+}
+
+std::string saturate_name(saturate smode)
+{
+    switch (smode)
+    {
+        case saturate::off: return "off";
+        case saturate::on:  return "on";
+        default: return "";
+    }
+}
+
+template<class T>
+T clamp(T x, T a, T b)
+{
+    return (std::min)(b, (std::max)(a, x));
+}
+
+template<class IN1, class OUT1>
+struct convert_cast : public unary_func<IN1, OUT1>
+{
+    static_assert(vector_size<IN1>::value == vector_size<OUT1>::value, "The operand and result type must have the same number of elements");
+
+    typedef typename scalar_type<IN1>::type in_scalar_type;
+    typedef typename scalar_type<OUT1>::type out_scalar_type;
+
+    in_scalar_type in_min;
+    in_scalar_type in_max;
+    rounding_mode rmode;
+    saturate smode;
+
+    convert_cast(in_scalar_type min, in_scalar_type max, rounding_mode rmode, saturate smode)
+        : in_min(min), in_max(max), rmode(rmode), smode(smode)
+    {
+    }
+
+    std::string str()
+    {
+        return "convert_cast";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_convert>\n";
+    }
+
+    IN1 min1()
+    {
+        return detail::def_limit<IN1>(in_min);
+    }
+
+    IN1 max1()
+    {
+        return detail::def_limit<IN1>(in_max);
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        OUT1 y;
+        for (size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            in_scalar_type v;
+            if (smode == saturate::on)
+                v = clamp(x.s[i],
+                    static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::min)()),
+                    static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::max)())
+                );
+            else
+                v = x.s[i];
+
+            if (std::is_integral<out_scalar_type>::value)
+            {
+                switch (rmode)
+                {
+                    case rounding_mode::rtp:
+                        y.s[i] = static_cast<out_scalar_type>(std::ceil(v));
+                        break;
+                    case rounding_mode::rtn:
+                        y.s[i] = static_cast<out_scalar_type>(std::floor(v));
+                        break;
+                    default:
+                        y.s[i] = static_cast<out_scalar_type>(v);
+                }
+            }
+            else
+            {
+                y.s[i] = static_cast<out_scalar_type>(v);
+            }
+        }
+        return y;
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_convert_cast(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string function_call = "convert_" + type_name<out_type>();
+    if (func.smode == saturate::on)
+        function_call += "_sat";
+    if (func.rmode != rounding_mode::def)
+        function_call += "_" + rounding_mode_name(func.rmode);
+    function_call += "(" + in1_value + ")";
+    return
+        "__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_convert_cast(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    std::string function_call = "convert_cast<" + type_name<out_type>();
+    if (func.rmode != rounding_mode::def)
+        function_call += ", rounding_mode::" + rounding_mode_name(func.rmode);
+    if (func.smode != saturate::def)
+        function_call += ", saturate::" + saturate_name(func.smode);
+    function_call += ">(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template <class convert_cast_op>
+int test_convert_cast_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, convert_cast_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int error;
+
+    typedef typename convert_cast_op::in_type INPUT;
+    typedef typename convert_cast_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_convert_cast<convert_cast_op, INPUT, OUTPUT>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(error)
+#else
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    if (!verify_unary(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_convert_cast)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_CONVERT_CAST_MACRO(OP) \
+    last_error = test_convert_cast_func( \
+        device, context, queue, n_elems, OP \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    // No-op
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_float2>(-100.0f, +100.0f, rounding_mode::rtn, saturate::def)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar2, cl_uchar2>(0, 255, rounding_mode::def, saturate::def)))
+
+    // int to int
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_int4, cl_short4>(40000, 40000, rounding_mode::def, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar8, cl_char8>(0, 127, rounding_mode::def, saturate::off)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_char8, cl_int8>(-100, 100, rounding_mode::def, saturate::off)))
+
+    // float to int
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_uchar2>(-100.0f, +400.0f, rounding_mode::def, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_double4, cl_char4>(-127.0, +127.0, rounding_mode::rtp, saturate::off)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float8, cl_uint8>(-1000.0f, +10000.0f, rounding_mode::rtp, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float16, cl_ushort16>(-10000.0f, +70000.0f, rounding_mode::rtn, saturate::on)))
+
+    // int to float
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_short8, cl_float8>(0, 12345, rounding_mode::def, saturate::def)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_long2, cl_float2>(-1000000, +1000000, rounding_mode::rtz, saturate::def)))
+
+#undef TEST_CONVERT_CAST_MACRO
+
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP

diff --git a/test_conformance/clcpp/convert/main.cpp b/test_conformance/clcpp/convert/main.cpp
new file mode 100644
index 0000000..9f4ed09
--- /dev/null
+++ b/test_conformance/clcpp/convert/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "convert_cast.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/device_queue/CMakeLists.txt b/test_conformance/clcpp/device_queue/CMakeLists.txt
new file mode 100644
index 0000000..0e1b2ee
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_DEVICE_QUEUE)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/device_queue/main.cpp b/test_conformance/clcpp/device_queue/main.cpp
new file mode 100644
index 0000000..1075c78
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_enqueue.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/device_queue/test_enqueue.hpp b/test_conformance/clcpp/device_queue/test_enqueue.hpp
new file mode 100644
index 0000000..f5d4e6d
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/test_enqueue.hpp

@@ -0,0 +1,699 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
+#define TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_enqueue {
+
+struct test_options
+{
+    int test;
+};
+
+struct output_type
+{
+    cl_int enqueue_kernel1_success;
+    cl_int enqueue_kernel2_success;
+    cl_int enqueue_kernel3_success;
+    cl_int enqueue_marker_success;
+    cl_int event1_is_valid;
+    cl_int event2_is_valid;
+    cl_int event3_is_valid;
+    cl_int user_event1_is_valid;
+    cl_int user_event2_is_valid;
+    cl_int values[10000];
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    int enqueue_kernel1_success;
+    int enqueue_kernel2_success;
+    int enqueue_kernel3_success;
+    int enqueue_marker_success;
+    int event1_is_valid;
+    int event2_is_valid;
+    int event3_is_valid;
+    int user_event1_is_valid;
+    int user_event2_is_valid;
+    int values[10000];
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    if (options.test == 0)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel2_success = 1;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event2_is_valid = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0));
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+
+        release_event(event1);
+    }
+    )";
+    }
+    else if (options.test == 1)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
+        clk_event_t event2;
+        int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 1, &event1, &event2,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 1;
+        });
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+
+        release_event(event1);
+        release_event(event2);
+    }
+    )";
+    }
+    else if (options.test == 2)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->enqueue_kernel3_success = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        clk_event_t user_event1 = create_user_event();
+        retain_event(user_event1);
+        output->user_event1_is_valid = is_valid_event(user_event1);
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange1, 1, &user_event1, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+        release_event(user_event1);
+
+        clk_event_t user_event2 = create_user_event();
+        output->user_event2_is_valid = is_valid_event(user_event2);
+
+        clk_event_t events[2];
+        events[0] = user_event2;
+        events[1] = user_event1;
+
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, get_local_size(0));
+        clk_event_t event2;
+        int status2 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange2, 2, events, &event2,
+        ^(local void *p0, local void *p1, local void *p2) {
+            const ulong gid = get_global_id(0);
+            const ulong lid = get_local_id(0);
+            local int2 *l0 = (local int2 *)p0;
+            local int *l1 = (local int *)p1;
+            local int *l2 = (local int *)p2;
+            l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+            if (lid < 5) l0[lid] = (int2)(3, 4);
+            if (lid < 3) l2[lid] = 5;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+            output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
+        }, sizeof(int2) * 5, sizeof(int) * get_local_size(0), sizeof(int) * 3);
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+
+        set_user_event_status(user_event1, CL_COMPLETE);
+        set_user_event_status(user_event2, CL_COMPLETE);
+
+        release_event(user_event1);
+        release_event(user_event2);
+        release_event(event1);
+        release_event(event2);
+    }
+    )";
+    }
+    else if (options.test == 3)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->user_event2_is_valid = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 20;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
+        clk_event_t event2;
+        int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 0, NULL, &event2,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 20;
+        });
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+
+        clk_event_t user_event1 = create_user_event();
+        output->user_event1_is_valid = is_valid_event(user_event1);
+
+        clk_event_t events[3];
+        events[0] = event2;
+        events[1] = user_event1;
+        events[2] = event1;
+
+        clk_event_t event3;
+        int status3 = enqueue_marker(queue, 3, events, &event3);
+        output->enqueue_marker_success = status3 == CLK_SUCCESS;
+        output->event3_is_valid = is_valid_event(event3);
+
+        int status4 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange_1D(get_global_size(0)), 1, &event3, NULL,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid] /= 20;
+        });
+        output->enqueue_kernel3_success = status4 == CLK_SUCCESS;
+
+        set_user_event_status(user_event1, CL_COMPLETE);
+
+        release_event(user_event1);
+        release_event(event1);
+        release_event(event2);
+        release_event(event3);
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_common>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    #include <opencl_device_queue>
+    using namespace cl;
+    )";
+
+    s << source_common;
+    if (options.test == 0)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel2_success = 1;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event2_is_valid = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        ndrange ndrange1(get_global_size(0));
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+
+        event1.release();
+    }
+    )";
+    }
+    else if (options.test == 1)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+
+        ndrange ndrange2(1, get_global_size(0) / 2, 1);
+        event event2;
+        enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &event1, &event2, ndrange2,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 1;
+        }, output);
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+
+        event1.release();
+        event2.release();
+    }
+    )";
+    }
+    else if (options.test == 2)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->enqueue_kernel3_success = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        event user_event1 = make_user_event();
+        user_event1.retain();
+        output->user_event1_is_valid = user_event1.is_valid();
+
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &user_event1, &event1, ndrange1,
+        [](global<output_type> *output){
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+        user_event1.release();
+
+        event user_event2 = make_user_event();
+        output->user_event2_is_valid = user_event2.is_valid();
+
+        event events[2];
+        events[0] = user_event2;
+        events[1] = user_event1;
+
+        ndrange ndrange2(1, get_global_size(0) / 2, get_local_size(0));
+        event event2;
+        enqueue_status status2 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 2, events, &event2, ndrange2,
+        [](global<output_type> *output, local_ptr<int2[]> l0, local_ptr<int[]> l1, local_ptr<int[]> l2) {
+            const ulong gid = get_global_id(0);
+            const ulong lid = get_local_id(0);
+            l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
+            work_group_barrier(mem_fence::local);
+            if (lid < 5) l0[lid] = int2(3, 4);
+            if (lid < 3) l2[lid] = 5;
+            work_group_barrier(mem_fence::local);
+            output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
+        }, output, local_ptr<int2[]>::size_type(5), local_ptr<int[]>::size_type(get_local_size(0)), local_ptr<int[]>::size_type(3));
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+
+        user_event1.set_status(event_status::complete);
+        user_event2.set_status(event_status::complete);
+
+        user_event1.release();
+        user_event2.release();
+        event1.release();
+        event2.release();
+    }
+    )";
+    }
+    else if (options.test == 3)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->user_event2_is_valid = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 20;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+
+        ndrange ndrange2(1, get_global_size(0) / 2, 1);
+        event event2;
+        enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 0, nullptr, &event2, ndrange2,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 20;
+        }, output);
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+
+        event user_event1 = make_user_event();
+        output->user_event1_is_valid = user_event1.is_valid();
+
+        event events[3];
+        events[0] = event2;
+        events[1] = user_event1;
+        events[2] = event1;
+
+        event event3;
+        enqueue_status status3 = queue.enqueue_marker(3, events, &event3);
+        output->enqueue_marker_success = status3 == enqueue_status::success;
+        output->event3_is_valid = event3.is_valid();
+
+        enqueue_status status4 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 1, &event3, nullptr, ndrange(get_global_size(0)),
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid] /= 20;
+        }, output);
+        output->enqueue_kernel3_success = status4 == enqueue_status::success;
+
+        user_event1.set_status(event_status::complete);
+
+        user_event1.release();
+        event1.release();
+        event2.release();
+        event3.release();
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    cl_uint max_queues;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(cl_uint), &max_queues, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    cl_uint max_events;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(cl_uint), &max_events, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    cl_command_queue device_queue1 = NULL;
+    cl_command_queue device_queue2 = NULL;
+
+    cl_queue_properties queue_properties1[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT,
+        0
+    };
+    device_queue1 = clCreateCommandQueueWithProperties(context, device, queue_properties1, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
+
+    if (max_queues > 1)
+    {
+        cl_queue_properties queue_properties2[] =
+        {
+            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE,
+            0
+        };
+        device_queue2 = clCreateCommandQueueWithProperties(context, device, queue_properties2, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
+    }
+
+    cl_mem output_buffer;
+    output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(cl_command_queue), device_queue2 != NULL ? &device_queue2 : &device_queue1);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    const char pattern = 0;
+    error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+    size_t max_work_group_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    const size_t local_size = (std::min)((size_t)256, max_work_group_size);
+    const size_t global_size = 10000 / local_size * local_size;
+    const size_t count = global_size;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    output_type output;
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(output_type),
+        static_cast<void *>(&output),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    if (!output.enqueue_kernel1_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_kernel2_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_kernel3_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_marker_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_marker did not succeed")
+    }
+    if (!output.event1_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event1 is not valid")
+    }
+    if (!output.event2_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event2 is not valid")
+    }
+    if (!output.event3_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event3 is not valid")
+    }
+    if (!output.user_event1_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "user_event1 is not valid")
+    }
+    if (!output.user_event2_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "user_event2 is not valid")
+    }
+
+    for (size_t i = 0; i < count; i++)
+    {
+        const cl_int result = output.values[i];
+        const cl_int expected = 1;
+
+        if (result != expected)
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "kernel did not return correct value. Expected: %s, got: %s",
+                format_value(expected).c_str(), format_value(result).c_str()
+            )
+        }
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseCommandQueue(device_queue1);
+    if (device_queue2 != NULL)
+        clReleaseCommandQueue(device_queue2);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_enqueue_one_kernel)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 0;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_enqueue_two_kernels)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 1;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_enqueue_user_events_and_locals)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 2;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_enqueue_marker)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 3;
+    return test(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP

diff --git a/test_conformance/clcpp/funcs_test_utils.hpp b/test_conformance/clcpp/funcs_test_utils.hpp
new file mode 100644
index 0000000..e839231
--- /dev/null
+++ b/test_conformance/clcpp/funcs_test_utils.hpp

@@ -0,0 +1,72 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
+#define TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
+
+// This file contains helper classes and functions for testing various unary, binary
+// and ternary OpenCL functions (for example cl::abs(x) or cl::abs_diff(x, y)), 
+// as well as other helper functions/classes.
+
+#include "common.hpp"
+
+#define TEST_UNARY_FUNC_MACRO(TEST_CLASS) \
+    last_error = test_unary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+#define TEST_BINARY_FUNC_MACRO(TEST_CLASS) \
+    last_error = test_binary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+#define TEST_TERNARY_FUNC_MACRO(TEST_CLASS) \
+    last_error = test_ternary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+#include "utils_test/compare.hpp"
+#include "utils_test/generate_inputs.hpp"
+
+// HOWTO:
+//
+// unary_func, binary_func, ternary_func - base classes wrapping OpenCL functions that
+// you want to test.
+// 
+// To create a wrapper class for given function, you need to create a class derived from correct
+// base class (unary_func, binary_func, ternary_func), and define:
+//
+// * std::string str() method which should return class name in OpenCL ("abs", "abs_diff"),
+// * operator(x), operator(x, y) or operator(x,y,z) depending on arity of the function you wish
+// to test, method should work exactly as the tested function works in OpenCL
+// * if it's needed you can overload min1, max1, min2, max2, min3, max3 methods with returns min 
+// and max values that can be generated for given input (function argument) [required for vec 
+// arguments],
+// * if you want to use vector arguments (for example: cl_int2, cl_ulong16), you should look at
+// how int_func_clamp<> is implemented in integer_funcs/numeric_funcs.hpp.
+//
+// To see how you should use class you've just created see AUTO_TEST_CASE(test_int_numeric_funcs)
+// in integer_funcs/numeric_funcs.hpp.
+#include "utils_test/unary.hpp"
+#include "utils_test/binary.hpp"
+#include "utils_test/ternary.hpp"
+
+#endif // TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP

diff --git a/test_conformance/clcpp/geometric_funcs/CMakeLists.txt b/test_conformance/clcpp/geometric_funcs/CMakeLists.txt
new file mode 100644
index 0000000..25d05ed
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_GEOMETRIC_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp
new file mode 100644
index 0000000..c179728
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp

@@ -0,0 +1,229 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+
+// float fast_distance(float4 p0, float4 p1);
+struct geometric_func_fast_distance : public binary_func<cl_float4, cl_float4, cl_float>
+{
+
+    std::string str()
+    {
+        return "fast_distance";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_double r = 0.0f;
+        cl_double t;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
+            r += t * t;
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    cl_double delta(const cl_float4& p0, const cl_float4& p1, const cl_float& expected)
+    {
+        (void) p0; (void) p1;
+        return 0.01f * expected;
+    }
+
+    float ulp()
+    {
+        return
+            8192.0f + // error in sqrt
+            (1.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+// float fast_length(float4 p);
+struct geometric_func_fast_length : public unary_func<cl_float4,cl_float>
+{
+    std::string str()
+    {
+        return "fast_length";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p)
+    {
+        cl_double r = 0.0f;
+        for(size_t i = 0; i < 4; i++)
+        {
+            r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    cl_double delta(const cl_float4& p, const cl_float& expected)
+    {
+        (void) p;
+        return 0.01f * expected;
+    }
+
+    float ulp()
+    {
+        return
+            8192.0f + // error in sqrt
+            0.5f * // effect on e of taking sqrt( x + e )
+                ((0.5f * 4.0f) + // cumulative error for multiplications
+                (0.5f * 3.0f));  // cumulative error for additions
+    }
+};
+
+// float4 fast_normalize(float4 p);
+struct geometric_func_fast_normalize : public unary_func<cl_float4,cl_float4>
+{
+    std::string str()
+    {
+        return "fast_normalize";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float4 operator()(const cl_float4& p)
+    {
+        cl_double t = 0.0f;
+        cl_float4 r;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+
+        if(t == 0.0f)
+        {
+            for(size_t i = 0; i < 4; i++)
+            {
+                r.s[i] = 0.0f;
+            }
+            return r;
+        }
+
+        t = std::sqrt(t);
+        for(size_t i = 0; i < 4; i++)
+        {
+            r.s[i] = static_cast<cl_double>(p.s[i]) / t;
+        }
+        return r;
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    std::vector<cl_float4> in_special_cases()
+    {
+        return {
+            {0.0f, 0.0f, 0.0f, 0.0f}
+        };
+    }
+
+
+    cl_double4 delta(const cl_float4& p, const cl_float4& expected)
+    {
+        (void) p;
+        auto e = detail::make_value<cl_double4>(0.01f);
+        return detail::multiply<cl_double4>(e, expected);
+    }
+
+    float ulp()
+    {
+        return
+            8192.5f + // error in rsqrt + error in multiply
+            (0.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+AUTO_TEST_CASE(test_fast_geometric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // float fast_distance(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_fast_distance()))
+
+    // float fast_length(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_fast_length()))
+
+    // float4 fast_normalize(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_fast_normalize()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP

diff --git a/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp
new file mode 100644
index 0000000..561f9e9
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp

@@ -0,0 +1,389 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+
+// float4 cross(float4 p0, float4 p1)
+struct geometric_func_cross : public binary_func<cl_float4, cl_float4, cl_float4>
+{
+    geometric_func_cross(cl_device_id device)
+    {
+        // On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product
+        this->m_delta = 3.0f * CL_FLT_EPSILON;
+        // RTZ devices accrue approximately double the amount of error per operation.  Allow for that.
+        if(get_default_rounding_mode(device) == CL_FP_ROUND_TO_ZERO)
+        {
+            this->m_delta *= 2.0f;
+        }
+    }
+
+    std::string str()
+    {
+        return "cross";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float4 operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_float4 r;
+        r.s[0] = (p0.s[1] * p1.s[2]) - (p0.s[2] * p1.s[1]);
+        r.s[1] = (p0.s[2] * p1.s[0]) - (p0.s[0] * p1.s[2]);
+        r.s[2] = (p0.s[0] * p1.s[1]) - (p0.s[1] * p1.s[0]);
+        r.s[3] = 0.0f;
+        return r;
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+
+    cl_double4 delta(const cl_float4& p0, const cl_float4& p1, const cl_float4& expected)
+    {
+        (void) p0; (void) p1;
+        auto e = detail::make_value<cl_double4>(m_delta);
+        return detail::multiply<cl_double4>(e, expected);
+    }
+
+private:
+    cl_double m_delta;
+};
+
+// float dot(float4 p0, float4 p1);
+struct geometric_func_dot : public binary_func<cl_float4, cl_float4, cl_float>
+{
+
+    std::string str()
+    {
+        return "dot";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_float r;
+        r = p0.s[0] * p1.s[0];
+        r += p0.s[1] * p1.s[1];
+        r += p0.s[2] * p1.s[2];
+        r += p0.s[3] * p1.s[3];
+        return r;
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+
+    cl_double delta(const cl_float4& p0, const cl_float4& p1, cl_float expected)
+    {
+        (void) p0; (void) p1;
+        return expected * ((4.0f + (4.0f - 1.0f)) * CL_FLT_EPSILON);
+    }
+};
+
+// float distance(float4 p0, float4 p1);
+struct geometric_func_distance : public binary_func<cl_float4, cl_float4, cl_float>
+{
+
+    std::string str()
+    {
+        return "distance";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_double r = 0.0f;
+        cl_double t;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
+            r += t * t;
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    float ulp()
+    {
+        return
+            3.0f + // error in sqrt
+            (1.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+// float length(float4 p);
+struct geometric_func_length : public unary_func<cl_float4,cl_float>
+{
+
+    std::string str()
+    {
+        return "length";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p)
+    {
+        cl_double r = 0.0f;
+        for(size_t i = 0; i < 4; i++)
+        {
+            r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    float ulp()
+    {
+        return
+            3.0f + // error in sqrt
+            0.5f * // effect on e of taking sqrt( x + e )
+                ((0.5f * 4.0f) + // cumulative error for multiplications
+                (0.5f * 3.0f));  // cumulative error for additions
+    }
+};
+
+// float4 normalize(float4 p);
+struct geometric_func_normalize : public unary_func<cl_float4,cl_float4>
+{
+    std::string str()
+    {
+        return "normalize";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float4 operator()(const cl_float4& p)
+    {
+        cl_double t = 0.0f;
+        cl_float4 r;
+
+        // normalize( v ) returns a vector full of NaNs if any element is a NaN.
+        for(size_t i = 0; i < 4; i++)
+        {
+            if((std::isnan)(p.s[i]))
+            {
+                for(size_t j = 0; j < 4; j++)
+                {
+                    r.s[j] = p.s[i];
+                }
+                return r;
+            }
+        }
+
+        // normalize( v ) for which any element in v is infinite shall proceed as
+        // if the elements in v were replaced as follows:
+        // for( i = 0; i < sizeof(v) / sizeof(v[0] ); i++ )
+        //     v[i] = isinf(v[i]) ? copysign(1.0, v[i]) : 0.0 * v [i];
+        for(size_t i = 0; i < 4; i++)
+        {
+            if((std::isinf)(p.s[i]))
+            {
+                for(size_t j = 0; j < 4; j++)
+                {
+                    r.s[j] = (std::isinf)(p.s[j]) ? (std::copysign)(1.0, p.s[j]) : 0.0 * p.s[j];
+                }
+                r = (*this)(r);
+                return r;
+            }
+        }
+
+        for(size_t i = 0; i < 4; i++)
+        {
+            t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+
+        // normalize( v ) returns v if all elements of v are zero.
+        if(t == 0.0f)
+        {
+            for(size_t i = 0; i < 4; i++)
+            {
+                r.s[i] = 0.0f;
+            }
+            return r;
+        }
+
+        t = std::sqrt(t);
+        for(size_t i = 0; i < 4; i++)
+        {
+            r.s[i] = static_cast<cl_double>(p.s[i]) / t;
+        }
+
+        return r;
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    std::vector<cl_float4> in_special_cases()
+    {
+        return {
+            {0.0f, 0.0f, 0.0f, 0.0f},
+            {std::numeric_limits<float>::infinity(), 0.0f, 0.0f, 0.0f},
+            {
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity()
+            },
+            {
+                std::numeric_limits<float>::infinity(),
+                1.0f,
+                0.0f,
+                std::numeric_limits<float>::quiet_NaN()
+            },
+            {-1.0f, -1.0f, 0.0f,-300.0f}
+        };
+    }
+
+    float ulp()
+    {
+        return
+            2.5f + // error in rsqrt + error in multiply
+            (0.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+AUTO_TEST_CASE(test_geometric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // float4 cross(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_cross(device)))
+
+    // float dot(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_dot()))
+
+    // float distance(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_distance()))
+
+    // float length(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_length()))
+
+    // float4 normalize(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_normalize()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP

diff --git a/test_conformance/clcpp/geometric_funcs/main.cpp b/test_conformance/clcpp/geometric_funcs/main.cpp
new file mode 100644
index 0000000..ee3a51b
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/main.cpp

@@ -0,0 +1,44 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <limits>
+
+#include "../common.hpp"
+
+#include "geometric_funcs.hpp"
+#include "fast_geometric_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/images/CMakeLists.txt b/test_conformance/clcpp/images/CMakeLists.txt
new file mode 100644
index 0000000..3c92ecd
--- /dev/null
+++ b/test_conformance/clcpp/images/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_IMAGES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/images/common.hpp b/test_conformance/clcpp/images/common.hpp
new file mode 100644
index 0000000..957d266
--- /dev/null
+++ b/test_conformance/clcpp/images/common.hpp

@@ -0,0 +1,195 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
+
+#include <type_traits>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "../harness/imageHelpers.h"
+
+
+namespace detail
+{
+
+template<cl_channel_type channel_type>
+struct channel_info;
+
+template<>
+struct channel_info<CL_SIGNED_INT8>
+{
+    typedef cl_char channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_SIGNED_INT16>
+{
+    typedef cl_short channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_SIGNED_INT32>
+{
+    typedef cl_int channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_UNSIGNED_INT8>
+{
+    typedef cl_uchar channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_UNSIGNED_INT16>
+{
+    typedef cl_ushort channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_UNSIGNED_INT32>
+{
+    typedef cl_uint channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_FLOAT>
+{
+    typedef cl_float channel_type;
+    typedef cl_float4 element_type;
+    static std::string function_suffix() { return "f"; }
+
+    channel_type channel_min() { return -1e-3f; }
+    channel_type channel_max() { return +1e+3f; }
+};
+
+template<cl_mem_object_type image_type>
+struct image_info;
+
+template<>
+struct image_info<CL_MEM_OBJECT_IMAGE1D>
+{
+    static std::string image_type_name() { return "image1d"; }
+    static std::string coord_accessor() { return "x"; }
+};
+
+template<>
+struct image_info<CL_MEM_OBJECT_IMAGE2D>
+{
+    static std::string image_type_name() { return "image2d"; }
+    static std::string coord_accessor() { return "xy"; }
+};
+
+template<>
+struct image_info<CL_MEM_OBJECT_IMAGE3D>
+{
+    static std::string image_type_name() { return "image3d"; }
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    static std::string coord_accessor() { return "xyzw"; }
+#else
+    static std::string coord_accessor() { return "xyz"; }
+#endif
+};
+
+} // namespace
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test_base :
+    detail::channel_info<ChannelType>,
+    detail::image_info<ImageType>
+{ };
+
+// Create image_descriptor (used by harness/imageHelpers functions)
+image_descriptor create_image_descriptor(cl_image_desc &image_desc, cl_image_format *image_format)
+{
+    image_descriptor image_info;
+    image_info.width = image_desc.image_width;
+    image_info.height = image_desc.image_height;
+    image_info.depth = image_desc.image_depth;
+    image_info.arraySize = image_desc.image_array_size;
+    image_info.rowPitch = image_desc.image_row_pitch;
+    image_info.slicePitch = image_desc.image_slice_pitch;
+    image_info.format = image_format;
+    image_info.buffer = image_desc.mem_object;
+    image_info.type = image_desc.image_type;
+    image_info.num_mip_levels = image_desc.num_mip_levels;
+    return image_info;
+}
+
+const std::vector<cl_channel_order> get_channel_orders(cl_device_id device)
+{
+    // According to "Minimum List of Supported Image Formats" of OpenCL specification:
+    return { CL_R, CL_RG, CL_RGBA };
+}
+
+bool is_test_supported(cl_device_id device)
+{
+    // Check for image support
+    if (checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+    {
+        log_info("SKIPPED: Device does not support images. Skipping test.\n");
+        return false;
+    }
+    return true;
+}
+
+// Checks if x is equal to y.
+template<class type>
+inline bool are_equal(const type& x,
+                      const type& y)
+{
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(!(x.s[i] == y.s[i]))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP

diff --git a/test_conformance/clcpp/images/main.cpp b/test_conformance/clcpp/images/main.cpp
new file mode 100644
index 0000000..8c41bb6
--- /dev/null
+++ b/test_conformance/clcpp/images/main.cpp

@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_read.hpp"
+#include "test_sample.hpp"
+#include "test_write.hpp"
+
+// FIXME: To use certain functions in test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variable (hangover from code specific to Apple's implementation):
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/images/test_read.hpp b/test_conformance/clcpp/images/test_read.hpp
new file mode 100644
index 0000000..3bc7b5e
--- /dev/null
+++ b/test_conformance/clcpp/images/test_read.hpp

@@ -0,0 +1,307 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "common.hpp"
+
+
+namespace test_images_read {
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+{
+    cl_channel_order channel_order;
+
+    image_test(cl_channel_order channel_order) :
+        channel_order(channel_order)
+    { }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+
+        kernel void test(
+            read_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            global element_type *output
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = read_image)" << image_test::function_suffix() <<
+                "(img, coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#else
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+
+        kernel void test(
+            const )" << image_test::image_type_name() << R"(<element_type, image_access::read> img,
+            const global_ptr<int4[]> coords,
+            global_ptr<element_type[]> output
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = img.read(coords[gid].)" << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#endif
+
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+
+        cl_program program;
+        cl_kernel kernel;
+
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+#else
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+#endif
+
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+
+        std::vector<channel_type> image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+
+        const size_t count = num_elements;
+
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+
+        cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            &image_format, &image_desc, static_cast<void *>(image_values.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<element_type> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(element_type) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type result = output[i];
+
+            element_type expected;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                expected.s);
+
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Reading from coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(output_buffer);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+};
+
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+
+    int error = CL_SUCCESS;
+
+    for (auto channel_order : get_channel_orders(device))
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_FLOAT>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_images_read_1d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_read_2d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_read_3d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP

diff --git a/test_conformance/clcpp/images/test_sample.hpp b/test_conformance/clcpp/images/test_sample.hpp
new file mode 100644
index 0000000..a96a563
--- /dev/null
+++ b/test_conformance/clcpp/images/test_sample.hpp

@@ -0,0 +1,363 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "common.hpp"
+
+
+namespace test_images_sample {
+
+enum class sampler_source
+{
+    param,
+    program_scope
+};
+
+const sampler_source sampler_sources[] = { sampler_source::param, sampler_source::program_scope };
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+{
+    cl_channel_order channel_order;
+    sampler_source source;
+
+    image_test(cl_channel_order channel_order, sampler_source source) :
+        channel_order(channel_order),
+        source(source)
+    { }
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+        )";
+
+        std::string sampler;
+        if (source == sampler_source::program_scope)
+        {
+            s << R"(
+        constant sampler_t sampler_program_scope = CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE;
+            )";
+            sampler = "sampler_program_scope";
+        }
+        else if (source == sampler_source::param)
+        {
+            sampler = "sampler_param";
+        }
+
+        s << R"(
+        kernel void test(
+            read_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            global element_type *output,
+            sampler_t sampler_param
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = read_image)" << image_test::function_suffix() <<
+                "(img, " << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#else
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+        )";
+
+        std::string sampler;
+        if (source == sampler_source::program_scope)
+        {
+            s << R"(
+        sampler sampler_program_scope = make_sampler<addressing_mode::none, normalized_coordinates::unnormalized, filtering_mode::nearest>();
+            )";
+            sampler = "sampler_program_scope";
+        }
+        else if (source == sampler_source::param)
+        {
+            sampler = "sampler_param";
+        }
+
+        s << R"(
+        kernel void test(
+            const )" << image_test::image_type_name() << R"(<element_type, image_access::sample> img,
+            const global_ptr<int4[]> coords,
+            global_ptr<element_type[]> output,
+            sampler sampler_param
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = img.sample()" << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#endif
+
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+
+        cl_program program;
+        cl_kernel kernel;
+
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+#else
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+#endif
+
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+
+        std::vector<channel_type> image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+
+        const size_t count = num_elements;
+
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+
+        cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            &image_format, &image_desc, static_cast<void *>(image_values.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        const cl_sampler_properties sampler_properties[] = {
+            CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+            CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+            CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+            0
+        };
+        cl_sampler sampler = clCreateSamplerWithProperties(context, sampler_properties, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateSamplerWithProperties")
+
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 3, sizeof(sampler), &sampler);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<element_type> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(element_type) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type result = output[i];
+
+            element_type expected;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                expected.s);
+
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Sampling from coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(output_buffer);
+        clReleaseSampler(sampler);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+};
+
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+
+    int error = CL_SUCCESS;
+
+    for (auto channel_order : get_channel_orders(device))
+    for (auto source : sampler_sources)
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_FLOAT>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_images_sample_1d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_sample_2d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_sample_3d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP

diff --git a/test_conformance/clcpp/images/test_write.hpp b/test_conformance/clcpp/images/test_write.hpp
new file mode 100644
index 0000000..0f54487
--- /dev/null
+++ b/test_conformance/clcpp/images/test_write.hpp

@@ -0,0 +1,327 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "common.hpp"
+
+
+namespace test_images_write {
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+{
+    cl_channel_order channel_order;
+
+    image_test(cl_channel_order channel_order) :
+        channel_order(channel_order)
+    { }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+
+        kernel void test(
+            write_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            const global element_type *input
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            write_image)" << image_test::function_suffix() <<
+                "(img, coords[gid]." << image_test::coord_accessor() << R"(, input[gid]);
+        }
+        )";
+
+        return s.str();
+    }
+#else
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+
+        kernel void test(
+            )" << image_test::image_type_name() << R"(<element_type, image_access::write> img,
+            const global_ptr<int4[]> coords,
+            const global_ptr<element_type[]> input
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            img.write(coords[gid].)" << image_test::coord_accessor() << R"(, input[gid]);
+        }
+        )";
+
+        return s.str();
+    }
+#endif
+
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+
+        cl_program program;
+        cl_kernel kernel;
+
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+#else
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+#endif
+
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+
+        std::vector<channel_type> random_image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+
+        const size_t count = num_elements;
+
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+
+        std::vector<element_type> input(count);
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+
+            // Use read_image_pixel from harness/imageHelpers to fill input values
+            // (it will deal with correct channels, orders etc.)
+            read_image_pixel<scalar_element_type>(static_cast<void *>(random_image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                input[i].s);
+        }
+
+        // image_row_pitch and image_slice_pitch must be 0, when clCreateImage is used with host_ptr = NULL
+        image_desc.image_row_pitch = 0;
+        image_desc.image_slice_pitch = 0;
+        cl_mem img = clCreateImage(context, CL_MEM_WRITE_ONLY,
+            &image_format, &image_desc, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        cl_mem input_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(element_type) * count, static_cast<void *>(input.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(input_buffer), &input_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<channel_type> image_values(image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count);
+
+        const size_t origin[3] = { 0 };
+        const size_t region[3] = { image_desc.image_width, image_desc.image_height, image_desc.image_depth };
+        error = clEnqueueReadImage(
+            queue, img, CL_TRUE,
+            origin, region, 0, 0,
+            static_cast<void *>(image_values.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type expected = input[i];
+
+            element_type result;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                result.s);
+
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Writing to coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(input_buffer);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+};
+
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+
+    int error = CL_SUCCESS;
+
+    for (auto channel_order : get_channel_orders(device))
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_FLOAT>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_images_write_1d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_write_2d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_write_3d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP

diff --git a/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp b/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp
new file mode 100644
index 0000000..98da450
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp

@@ -0,0 +1,142 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
+
+#include "common.hpp"
+#include <type_traits>
+
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad24 : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mad24";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        static_assert(
+            std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
+            "Function takes only signed/unsigned integers."
+        );
+        return (x * y) + z;
+    }
+
+    IN1 min1()
+    {
+        return 0;
+    }
+
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_mul24 : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "mul24";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        static_assert(
+            std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
+            "Function takes only signed/unsigned integers."
+        );
+        return x * y;
+    }
+
+    IN1 min1()
+    {
+        return 0;
+    }
+
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
+    }
+};
+
+AUTO_TEST_CASE(test_int_24bit_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    
+    // intn mad24(intn x, intn y, intn z);
+    // uintn mad24(uintn x, uintn y, uintn z);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_uint, cl_uint, cl_uint, cl_uint>()))
+
+    // intn mul24(intn x, intn y);
+    // uintn mul24(uintn x, uintn y);
+    TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_uint, cl_uint, cl_uint>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP

diff --git a/test_conformance/clcpp/integer_funcs/CMakeLists.txt b/test_conformance/clcpp/integer_funcs/CMakeLists.txt
new file mode 100644
index 0000000..ba4cfe8
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_INTEGER_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp b/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp
new file mode 100644
index 0000000..13ca156
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp

@@ -0,0 +1,232 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
+
+#include "common.hpp"
+#include <type_traits>
+
+template<class IN1, class OUT1>
+struct int_func_popcount : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "popcount";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 x)
+    {
+        OUT1 count = 0;
+        for (count = 0; x != 0; count++)
+        {
+            x &= x - 1;
+        }
+        return count;
+    }
+};
+
+template<class IN1, class OUT1>
+struct int_func_clz : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "clz";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 x)
+    {
+        OUT1 count = 0;
+        if(std::is_unsigned<IN1>::value)
+        {
+            cl_ulong value = x;
+            value <<= 8 * sizeof(value) - (8 * sizeof(x));
+            for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+            {
+                value <<= 1;
+            }
+        }
+        else
+        {            
+            cl_long value = x;
+            value <<= 8 * sizeof(value) - (8 * sizeof(x));
+            for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+            {
+                value <<= 1;
+            }
+        }
+        return count;
+    }
+};
+
+template<class IN1, class OUT1>
+struct int_func_ctz : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "ctz";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 x)
+    {
+        if(x == 0)
+            return sizeof(x);
+
+        OUT1 count = 0;
+        IN1 value = x;
+        for(count = 0; 0 == (value & 0x1); count++)
+        {
+            value >>= 1;
+        }
+        return count;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_rotate : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "rotate";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 value, IN2 shift)
+    {
+        static_assert(
+            std::is_unsigned<IN1>::value,
+            "Only unsigned integers are supported"
+        );
+        if ((shift &= sizeof(value)*8 - 1) == 0)
+            return value;
+        return (value << shift) | (value >> (sizeof(value)*8 - shift));
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return sizeof(IN1) * 8;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_upsample : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "upsample";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 hi, IN2 lo)
+    {
+        static_assert(
+            sizeof(IN1) == sizeof(IN2),
+            "sizeof(IN1) != sizeof(IN2)"
+        );
+        static_assert(
+            sizeof(OUT1) == 2 * sizeof(IN1),
+            "sizeof(OUT1) != 2 * sizeof(IN1)"
+        );
+        static_assert(
+            std::is_unsigned<IN2>::value,
+            "IN2 type must be unsigned"
+        );
+        return (static_cast<OUT1>(hi) << (8*sizeof(IN1))) | lo;
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return sizeof(IN1) * 8;
+    }
+};
+
+AUTO_TEST_CASE(test_int_bitwise_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_ulong, cl_ulong>()))
+
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_ulong, cl_ulong>()))
+
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_ulong, cl_ulong>()))
+
+    TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // shortn upsample(charn hi, ucharn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_char, cl_uchar, cl_short>()))
+    // ushortn upsample(ucharn hi, ucharn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uchar, cl_uchar, cl_ushort>()))
+    // intn upsample(shortn hi, ushortn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_short, cl_ushort, cl_int>()))
+    // uintn upsample(ushortn hi, ushortn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_ushort, cl_ushort, cl_uint>()))
+    // longn upsample(intn hi, uintn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_int, cl_uint, cl_long>()))
+    // ulongn upsample(uintn hi, uintn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uint, cl_uint, cl_ulong>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP

diff --git a/test_conformance/clcpp/integer_funcs/common.hpp b/test_conformance/clcpp/integer_funcs/common.hpp
new file mode 100644
index 0000000..f04811e
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/common.hpp

@@ -0,0 +1,26 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
+
+#include <random>
+#include <limits>
+#include <algorithm>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP

diff --git a/test_conformance/clcpp/integer_funcs/main.cpp b/test_conformance/clcpp/integer_funcs/main.cpp
new file mode 100644
index 0000000..ab2664a
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/main.cpp

@@ -0,0 +1,26 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "bitwise_funcs.hpp"
+#include "numeric_funcs.hpp"
+#include "24bit_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp b/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp
new file mode 100644
index 0000000..21d75c5
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp

@@ -0,0 +1,703 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
+
+#include "common.hpp"
+#include <type_traits>
+
+template<class IN1, class OUT1>
+struct int_func_abs : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "abs";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "OUT1 type must be unsigned"
+        );
+        if(x < IN1(0))
+            return static_cast<OUT1>(-x);
+        return static_cast<OUT1>(x);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_abs_diff : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "abs_diff";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "OUT1 type must be unsigned"
+        );
+        if(x < y)
+            return static_cast<OUT1>(y-x);
+        return static_cast<OUT1>(x-y);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_add_sat : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "add_sat";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        // sat unsigned integers
+        if(std::is_unsigned<OUT1>::value)
+        {
+            OUT1 z = x + y;
+            if(z < x || z < y)
+                return (std::numeric_limits<OUT1>::max)();
+            return z;
+        }
+        // sat signed integers
+        OUT1 z = x + y;
+        if(y > 0)
+        {
+            if(z < x)
+                return (std::numeric_limits<OUT1>::max)();
+        }
+        else
+        {
+            if(z > x)
+                return (std::numeric_limits<OUT1>::min)();
+        }
+        return z;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_hadd : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "hadd";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        return (x >> OUT1(1)) + (y >> OUT1(1)) + (x & y & OUT1(1));
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_rhadd : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "rhadd";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        return (x >> OUT1(1)) + (y >> OUT1(1)) + ((x | y) & OUT1(1));
+    }
+};
+
+// clamp for scalars
+template<class IN1, class IN2, class IN3, class OUT1, class Enable = void>
+struct int_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "clamp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN2, IN3>::value,
+            "IN3 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN1>::value,
+            "OUT1 must be IN1"
+        );
+        return (std::min)((std::max)(x, minval), maxval);
+    }
+
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2);
+    }
+
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
+    }
+
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)();
+    }
+};
+
+// gentype clamp(gentype x, scalar minval, scalar maxval);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_clamp<IN1, IN2, IN3, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "clamp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN2, IN3>::value,
+            "IN3 must be IN2"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value && !is_vector_type<IN3>::value,
+            "IN3 and IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<OUT1, IN1>::value,
+            "OUT1 must be IN1"
+        );
+        OUT1 result;
+        for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+        {
+            result.s[i] = (std::min)((std::max)(x.s[i], minval), maxval);
+        }
+        return result;
+    }
+
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2);
+    }
+
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
+    }
+
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)();
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_mul_hi : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "mul_hi";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, OUT1>::value,
+            "Types must be the same"
+        );
+        static_assert(
+            !std::is_same<IN1, cl_long>::value && !std::is_same<IN1, cl_ulong>::value,
+            "Operation unimplemented for 64-bit scalars"
+        );  
+        cl_long xl = static_cast<cl_long>(x);
+        cl_long yl = static_cast<cl_long>(y);
+        return static_cast<OUT1>((xl * yl) >> (8 * sizeof(OUT1)));
+    }
+};
+
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad_hi : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mad_hi";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "Types must be the same"
+        );   
+        return int_func_mul_hi<IN1, IN2, OUT1>()(x, y) + z;
+    }
+};
+
+// This test is implemented only for unsigned integers
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad_sat : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mad_sat";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "Types must be the same"
+        );
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "Test operation is not implemented for signed integers"
+        );  
+        // mad_sat unsigned integers
+        OUT1 w1 = (x * y);
+        if (x != 0 && w1 / x != y)
+            return (std::numeric_limits<OUT1>::max)();
+        OUT1 w2 = w1 + z;
+        if(w2 < w1)
+            return (std::numeric_limits<OUT1>::max)();
+        return w2;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_sub_sat : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "sub_sat";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        // sat unsigned integers
+        if(std::is_unsigned<OUT1>::value)
+        {
+            OUT1 z = x - y;
+            if(x < y)
+                return (std::numeric_limits<OUT1>::min)();
+            return z;
+        }
+        // sat signed integers
+        OUT1 z = x - y;
+        if(y < 0)
+        {
+            if(z < x)
+                return (std::numeric_limits<OUT1>::max)();
+        }
+        else
+        {
+            if(z > x)
+                return (std::numeric_limits<OUT1>::min)();
+        }
+        return z;
+    }
+};
+
+template<class IN1, class IN2, class OUT1, class Enable = void>
+struct int_func_max : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "max";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        return (std::max)(x, y);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_max<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "max";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "IN1 and OUT1 must be the same types"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value,
+            "IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
+            "IN2 must match with OUT1 and IN1"
+        );
+        IN1 result = x;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            result.s[i] = (std::max)(x.s[i], y);
+        }
+        return result;
+    }
+};
+
+template<class IN1, class IN2, class OUT1, class Enable = void>
+struct int_func_min : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "min";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        return (std::min)(x, y);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_min<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "min";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "IN1 and OUT1 must be the same types"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value,
+            "IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
+            "IN2 must match with OUT1 and IN1"
+        );
+        IN1 result = x;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            result.s[i] = (std::min)(x.s[i], y);
+        }
+        return result;
+    }
+};
+
+AUTO_TEST_CASE(test_int_numeric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // ugentype abs(gentype x);
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_int, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_long, cl_ulong>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_ulong, cl_ulong>()))
+
+    // ugentype abs_diff(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_int, cl_int, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_long, cl_long, cl_ulong>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype add_sat(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype hadd(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype rhadd(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype clamp(gentype x, gentype minval, gentype maxval);
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long, cl_long, cl_long, cl_long>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype clamp(gentype x, scalar minval, scalar maxval);
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int2, cl_int, cl_int, cl_int2>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint4, cl_uint, cl_uint, cl_uint4>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long8, cl_long, cl_long, cl_long8>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong16, cl_ulong, cl_ulong, cl_ulong16>()))
+
+    // gentype mad_hi(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_short, cl_short, cl_short, cl_short>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_uint, cl_uint, cl_uint, cl_uint>()))
+
+    // gentype mad_sat(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype max(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype max(gentype x, scalar y);
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_int2, cl_int, cl_int2>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint4, cl_uint, cl_uint4>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_long8, cl_long, cl_long8>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong16, cl_ulong, cl_ulong16>()))
+
+    // gentype min(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype min(gentype x, scalar y);
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_int2, cl_int, cl_int2>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint4, cl_uint, cl_uint4>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_long8, cl_long, cl_long8>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong16, cl_ulong, cl_ulong16>()))
+
+    // gentype mul_hi(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_short, cl_short, cl_short>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_ushort, cl_ushort, cl_ushort>())) 
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_uint, cl_uint, cl_uint>()))
+
+    // gentype sub_sat(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_ulong, cl_ulong, cl_ulong>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP

diff --git a/test_conformance/clcpp/math_funcs/CMakeLists.txt b/test_conformance/clcpp/math_funcs/CMakeLists.txt
new file mode 100644
index 0000000..c3b56c1
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_MATH_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/math_funcs/common.hpp b/test_conformance/clcpp/math_funcs/common.hpp
new file mode 100644
index 0000000..3224905
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/common.hpp

@@ -0,0 +1,347 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
+
+#include <cmath>
+#include <limits>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "reference.hpp"
+
+#ifndef MATH_FUNCS_CLASS_NAME
+    #define MATH_FUNCS_CLASS_NAME(x, y) x ## _func_ ## y        
+#endif 
+
+#define MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, OCL_FUNC, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public unary_func<cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_FUNC; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    /* Reference value type is cl_double */ \
+    cl_double operator()(const cl_float& x)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x)); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const T& expected) \
+    { \
+        typedef  \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+#define MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public binary_func<cl_float, cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_NAME; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    \
+    cl_float operator()(const cl_float& x, const cl_float& y)  \
+    { \
+        return (HOST_FUNC)(x, y); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_float min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_float max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in2_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const cl_float& in2, const T& expected) \
+    { \
+        typedef \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        (void) in2; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+#define MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public ternary_func<cl_float, cl_float, cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    \
+    cl_double operator()(const cl_float& x, const cl_float& y, const cl_float& z)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x), static_cast<cl_double>(y), static_cast<cl_double>(z)); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_float min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_float max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    cl_float min3() \
+    { \
+        return MIN3; \
+    } \
+    \
+    cl_float max3() \
+    { \
+        return MAX3; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in2_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in3_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const cl_float& in2, const cl_float& in3, const T& expected) \
+    { \
+        typedef \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        (void) in2; \
+        (void) in3; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+#define MATH_FUNCS_DEFINE_UNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \
+    MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1)
+#define MATH_FUNCS_DEFINE_BINARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \
+    MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2)
+#define MATH_FUNCS_DEFINE_TERNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \
+    MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3)
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/comparison_funcs.hpp b/test_conformance/clcpp/math_funcs/comparison_funcs.hpp
new file mode 100644
index 0000000..0bd6ff9
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/comparison_funcs.hpp

@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fdim, std::fdim, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmax, std::fmax, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmin, std::fmin, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, maxmag, reference::maxmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, minmag, reference::minmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+
+// comparison functions
+AUTO_TEST_CASE(test_comparison_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    TEST_BINARY_FUNC_MACRO((comparison_func_fdim(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_fmax(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_fmin(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_maxmag(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_minmag(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/exponential_funcs.hpp b/test_conformance/clcpp/math_funcs/exponential_funcs.hpp
new file mode 100644
index 0000000..82a8247
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/exponential_funcs.hpp

@@ -0,0 +1,139 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp, std::exp, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, expm1, std::expm1, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp2, std::exp2, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp10, reference::exp10, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+
+struct exponential_func_ldexp : public binary_func<cl_float, cl_int, cl_float>
+{
+    exponential_func_ldexp(bool is_embedded) : m_is_embedded(is_embedded) 
+    {
+   
+    }
+   
+    std::string str()
+    {
+        return "ldexp";
+    }
+   
+    std::string headers() 
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    /* Reference value type is cl_double */
+    cl_double operator()(const cl_float& x, const cl_int& y) 
+    {
+        return (std::ldexp)(static_cast<cl_double>(x), y);
+    }
+   
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+   
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    cl_int min2()
+    {
+        return -8;
+    }
+   
+    cl_int max2()
+    {
+        return 8;
+    }
+   
+    std::vector<cl_float> in1_special_cases()
+    {
+        return { 
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+   
+    bool use_ulp()
+    {
+        return true;
+    }
+   
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// exponential functions
+AUTO_TEST_CASE(test_exponential_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // auto exp(gentype x);
+    // auto expm1(gentype x);
+    // auto exp2(gentype x);
+    // auto exp10(gentype x);
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_expm1(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp10(is_embedded_profile)))
+
+    // auto ldexp(gentype x, intn k);
+    TEST_BINARY_FUNC_MACRO((exponential_func_ldexp(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp b/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp
new file mode 100644
index 0000000..63b4c23
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp

@@ -0,0 +1,733 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// -------------- UNARY FUNCTIONS
+
+// gentype ceil(gentype x);
+// gentype floor(gentype x);
+// gentype rint(gentype x);
+// gentype round(gentype x);
+// gentype trunc(gentype x);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, ceil, std::ceil, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, floor, std::floor, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, rint, std::rint, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, round, std::round, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, trunc, std::trunc, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+
+// floatn nan(uintn nancode);
+struct fp_func_nan : public unary_func<cl_uint, cl_float>
+{
+    std::string str()
+    {
+        return "nan";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_float operator()(const cl_uint& x)
+    {
+        cl_uint r = x | 0x7fc00000U;
+        // cl_float and cl_int have the same size so that's correct
+        cl_float rf = *reinterpret_cast<cl_float*>(&r);
+        return rf;
+    }
+
+    cl_uint min1()
+    {
+        return 0;
+    }
+
+    cl_uint max1()
+    {
+        return 100;
+    }
+
+    std::vector<cl_uint> in1_special_cases()
+    {
+        return {
+            0, 1
+        };
+    }
+};
+
+// -------------- UNARY FUNCTIONS, 2ND ARG IS POINTER
+
+// gentype fract(gentype x, gentype* iptr);
+//
+// Fuction fract() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// fract() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (gentype* iptr).
+struct fp_func_fract : public unary_func<cl_float, cl_float2>
+{
+    fp_func_fract(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "fract";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x)
+    {
+        return reference::fract(static_cast<cl_double>(x));
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// We need to specialize generate_kernel_unary<>() function template for fp_func_fract.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
+{
+    return
+        "__kernel void test_fract(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = fract(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_fract(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = fract(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// gentype modf(gentype x, gentype* iptr);
+//
+// Fuction modf() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// modf() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (gentype* iptr).
+struct fp_func_modf : public unary_func<cl_float, cl_float2>
+{
+    fp_func_modf(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "modf";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x)
+    {
+        cl_double2 r;
+        r.s[0] = (std::modf)(static_cast<cl_double>(x), &(r.s[1]));
+        return r;
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// We need to specialize generate_kernel_unary<>() function template for fp_func_modf.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
+{
+    return
+        "__kernel void test_modf(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = modf(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_modf(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = modf(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// gentype frexp(gentype x, intn* exp);
+//
+// Fuction frexp() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// modf() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (intn* exp).
+struct fp_func_frexp : public unary_func<cl_float, cl_float2>
+{
+    fp_func_frexp(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "frexp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x)
+    {
+        cl_double2 r;
+        cl_int e;
+        r.s[0] = (std::frexp)(static_cast<cl_double>(x), &e);
+        r.s[1] = static_cast<cl_float>(e);
+        return r;
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// We need to specialize generate_kernel_unary<>() function template for fp_func_frexp.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
+{
+    return
+        "__kernel void test_frexp(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int itpr = 0;\n"
+        "    result.x = frexp(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_frexp(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int itpr = 0;\n"
+        "    result.x = frexp(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// -------------- BINARY FUNCTIONS
+
+// gentype copysign(gentype x, gentype y);
+// gentype fmod(gentype x, gentype y);
+// gentype remainder(gentype x, gentype y);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, copysign, std::copysign, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, fmod, std::fmod, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, remainder, std::remainder, true, 0.0f, 0.001f, 0.0f, -100.0f, 100.0f, -10.0f, 10.0f)
+
+// In case of function float nextafter(float, float) reference function must
+// operate on floats and return float.
+struct fp_func_nextafter : public binary_func<cl_float, cl_float, cl_float>
+{
+    fp_func_nextafter(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "nextafter";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    /* In this case reference value type MUST BE cl_float */
+    cl_float operator()(const cl_float& x, const cl_float& y)
+    {
+        return (std::nextafter)(x, y);
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 500.0f;
+    }
+
+    cl_float min2()
+    {
+        return 501.0f;
+    }
+
+    cl_float max2()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    std::vector<cl_float> in2_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// gentype remquo(gentype x, gentype y, intn* quo);
+struct fp_func_remquo : public binary_func<cl_float, cl_float, cl_float2>
+{
+    fp_func_remquo(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "remquo";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x, const cl_float& y)
+    {
+        return reference::remquo(static_cast<cl_double>(x), static_cast<cl_double>(y));
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    cl_float min2()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max2()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    std::vector<cl_float> in2_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+
+// We need to specialize generate_kernel_binary<>() function template for fp_func_remquo.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
+{
+    return
+        "__kernel void test_remquo(global float *input1, global float *input2, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int quo = 0;\n"
+        "    int sign = 0;\n"
+        "    result.x = remquo(input1[gid], input2[gid], &quo);\n"
+        // Specification say:
+        // "remquo also calculates the lower seven bits of the integral quotient x/y,
+        // and gives that value the same sign as x/y. It stores this signed value in
+        // the object pointed to by quo."
+        // Implemenation may save into quo more than seven bits. We need to take
+        // care of that here.
+        "    sign = (quo < 0) ? -1 : 1;\n"
+        "    quo = (quo < 0) ? -quo : quo;\n"
+        "    quo &= 0x0000007f;\n"
+        "    result.y = (sign < 0) ? -quo : quo;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_remquo(global_ptr<float[]> input1, global_ptr<float[]> input2, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int quo = 0;\n"
+        "    int sign = 0;\n"
+        "    result.x = remquo(input1[gid], input2[gid], &quo);\n"
+        // Specification say:
+        // "remquo also calculates the lower seven bits of the integral quotient x/y,
+        // and gives that value the same sign as x/y. It stores this signed value in
+        // the object pointed to by quo."
+        // Implemenation may save into quo more than seven bits. We need to take
+        // care of that here.
+        "    sign = (quo < 0) ? -1 : 1;\n"
+        "    quo = (quo < 0) ? -quo : quo;\n"
+        "    quo &= 0x0000007f;\n"
+        "    result.y = (sign < 0) ? -quo : quo;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// -------------- TERNARY FUNCTIONS
+
+// gentype fma(gentype a, gentype b, gentype c);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
+MATH_FUNCS_DEFINE_TERNARY_FUNC(fp, fma, std::fma, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+
+// floating point functions
+AUTO_TEST_CASE(test_fp_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype ceil(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_ceil(is_embedded_profile)))
+    // gentype floor(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_floor(is_embedded_profile)))
+    // gentype rint(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_rint(is_embedded_profile)))
+    // gentype round(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_round(is_embedded_profile)))
+    // gentype trunc(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_trunc(is_embedded_profile)))
+
+    // floatn nan(uintn nancode);
+    TEST_UNARY_FUNC_MACRO((fp_func_nan()))
+
+    // gentype fract(gentype x, gentype* iptr);
+    TEST_UNARY_FUNC_MACRO((fp_func_fract(is_embedded_profile)))
+    // gentype modf(gentype x, gentype* iptr);
+    TEST_UNARY_FUNC_MACRO((fp_func_modf(is_embedded_profile)))
+    // gentype frexp(gentype x, intn* exp);
+    TEST_UNARY_FUNC_MACRO((fp_func_frexp(is_embedded_profile)))
+
+    // gentype remainder(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_remainder(is_embedded_profile)))
+    // gentype copysign(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_copysign(is_embedded_profile)))
+    // gentype fmod(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_fmod(is_embedded_profile)))
+
+    // gentype nextafter(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_nextafter(is_embedded_profile)))
+
+    // gentype remquo(gentype x, gentype y, intn* quo);
+    TEST_BINARY_FUNC_MACRO((fp_func_remquo(is_embedded_profile)))
+
+    // gentype fma(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((fp_func_fma(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/half_math_funcs.hpp b/test_conformance/clcpp/math_funcs/half_math_funcs.hpp
new file mode 100644
index 0000000..d72d717
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/half_math_funcs.hpp

@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)  
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+#else
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_math::cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_math::sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_math::tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_math::exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_math::exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_math::exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_math::log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_math::log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_math::log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_math::rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_math::sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_math::recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_math::divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_math::powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+#endif
+
+// comparison functions
+AUTO_TEST_CASE(test_half_math_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    TEST_UNARY_FUNC_MACRO((half_math_func_cos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_sin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_tan(is_embedded_profile)))
+
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp10(is_embedded_profile)))
+
+    TEST_UNARY_FUNC_MACRO((half_math_func_log(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_log2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_log10(is_embedded_profile)))
+
+    TEST_BINARY_FUNC_MACRO((half_math_func_divide(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp b/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp
new file mode 100644
index 0000000..cd25d8e
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp

@@ -0,0 +1,260 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+namespace detail
+{
+
+// This function reads values of FP_ILOGB0 and FP_ILOGBNAN macros defined on the device.
+// OpenCL C++ Spec:
+// The value of FP_ILOGB0 shall be either {INT_MIN} or {INT_MAX}. The value of FP_ILOGBNAN
+// shall be either {INT_MAX} or {INT_MIN}.
+int get_ilogb_nan_zero(cl_device_id device, cl_context context, cl_command_queue queue, cl_int& ilogb_nan, cl_int& ilogb_zero)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str =
+        "__kernel void get_ilogb_nan_zero(__global int *out)\n"
+        "{\n"
+        "   out[0] = FP_ILOGB0;\n"
+        "   out[1] = FP_ILOGBNAN;\n"
+        "}\n";
+    std::string kernel_name("get_ilogb_nan_zero");
+
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+
+    std::vector<cl_int> output = generate_output<cl_int>(2);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = 1;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_int) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    // Save
+    ilogb_zero = output[0];
+    ilogb_nan = output[1];
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+} // namespace detail
+
+struct logarithmic_func_ilogb : public unary_func<cl_float, cl_int>
+{
+    logarithmic_func_ilogb(cl_int ilogb_nan, cl_int ilogb_zero)
+        : m_ilogb_nan(ilogb_nan), m_ilogb_zero(ilogb_zero)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "ilogb";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_int operator()(const cl_float& x)
+    {
+        if((std::isnan)(x))
+        {
+            return m_ilogb_nan;
+        }
+        else if(x == 0.0 || x == -0.0)
+        {
+            return m_ilogb_zero;
+        }
+        static_assert(
+            sizeof(cl_int) == sizeof(int),
+            "Tests assumes that sizeof(cl_int) == sizeof(int)"
+        );
+        return (std::ilogb)(x);
+    }
+
+    cl_float min1()
+    {
+        return -100.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+private:
+    cl_int m_ilogb_nan;
+    cl_int m_ilogb_zero;
+};
+
+// gentype log(gentype x);
+// gentype logb(gentype x);
+// gentype log2(gentype x);
+// gentype log10(gentype x);
+// gentype log1p(gentype x);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log, std::log, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, logb, std::logb, true, 0.0f, 0.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log2, std::log2, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log10, std::log10, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log1p, std::log1p, true, 2.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+
+// gentype lgamma(gentype x);
+// OpenCL C++ Spec.:
+// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
+// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
+
+// gentype lgamma_r(gentype x, intn* signp);
+// OpenCL C++ Spec.:
+// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
+// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
+//
+// Note:
+// We DO NOT test if sign of the gamma function return by lgamma_r is correct.
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma_r, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
+
+// We need to specialize generate_kernel_unary<>() function template for logarithmic_func_lgamma_r
+// because it takes two arguments, but only one of it is input, the 2nd one is used to return
+// the sign of the gamma function.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
+{
+    return
+        "__kernel void test_lgamma_r(global float *input, global float *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    int sign;\n"
+        "    output[gid] = lgamma_r(input[gid], &sign);\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_lgamma_r(global_ptr<float[]> input, global_ptr<float[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    int sign;\n"
+        "    output[gid] = lgamma_r(input[gid], &sign);\n"
+        "}\n";
+}
+#endif
+
+// logarithmic functions
+AUTO_TEST_CASE(test_logarithmic_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // Write values of FP_ILOGB0 and FP_ILOGBNAN, which are macros defined on the device, to
+    // ilogb_zero and ilogb_nan.
+    cl_int ilogb_nan = 0;
+    cl_int ilogb_zero = 0;
+    error = detail::get_ilogb_nan_zero(device, context, queue, ilogb_nan, ilogb_zero);
+    RETURN_ON_ERROR_MSG(error, "detail::get_ilogb_nan_zero function failed");
+
+    // intn ilogb(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_ilogb(ilogb_nan, ilogb_zero)))
+
+    // gentype log(gentype x);
+    // gentype logb(gentype x);
+    // gentype log2(gentype x);
+    // gentype log10(gentype x);
+    // gentype log1p(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_logb(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log10(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log1p(is_embedded_profile)))
+
+    // gentype lgamma(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma(is_embedded_profile)))
+
+    // gentype lgamma(gentype x);
+    //
+    // Note:
+    // We DO NOT test if sign of the gamma function return by lgamma_r is correct
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma_r(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/main.cpp b/test_conformance/clcpp/math_funcs/main.cpp
new file mode 100644
index 0000000..aada85f
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/main.cpp

@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <limits>
+
+#include "../common.hpp"
+
+#include "comparison_funcs.hpp"
+#include "exponential_funcs.hpp"
+#include "floating_point_funcs.hpp"
+#include "half_math_funcs.hpp"
+#include "logarithmic_funcs.hpp"
+#include "other_funcs.hpp"
+#include "power_funcs.hpp"
+#include "trigonometric_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/math_funcs/other_funcs.hpp b/test_conformance/clcpp/math_funcs/other_funcs.hpp
new file mode 100644
index 0000000..f939a56
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/other_funcs.hpp

@@ -0,0 +1,75 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, erfc, std::erfc, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, erf, std::erf, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, fabs, std::fabs, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, tgamma, std::tgamma, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(other, hypot, std::hypot, true, 4.0f, 4.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
+MATH_FUNCS_DEFINE_TERNARY_FUNC(other, mad, reference::mad, false, 0.0f, 0.0f, 0.1f, -10.0f, 10.0f, -10.0f, 10.0f, -10.0f, 10.0f)
+
+// other functions
+AUTO_TEST_CASE(test_other_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype erf(gentype x);
+    // gentype erfc(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_erfc(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((other_func_erf(is_embedded_profile)))
+
+    // gentype fabs(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_fabs(is_embedded_profile)))
+
+    // gentype tgamma(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_tgamma(is_embedded_profile)))
+
+    // gentype hypot(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((other_func_hypot(is_embedded_profile)))
+
+    // gentype mad(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((other_func_mad(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/power_funcs.hpp b/test_conformance/clcpp/math_funcs/power_funcs.hpp
new file mode 100644
index 0000000..2ace9b3
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/power_funcs.hpp

@@ -0,0 +1,153 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
+
+#include <limits>
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+#define DEFINE_BINARY_POWER_FUNC_INT(NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, MIN1, MAX1, MIN2, MAX2) \
+struct power_func_ ## NAME : public binary_func<cl_float, cl_int, cl_float> \
+{ \
+    power_func_ ## NAME(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #NAME; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    /* Reference value type is cl_double */ \
+    cl_double operator()(const cl_float& x, const cl_int& y)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x), y); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_int min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_int max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(-1.0f), \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+        }; \
+    } \
+    \
+    std::vector<cl_int> in2_special_cases() \
+    { \
+        return {  \
+            2, 3, -1, 1, -2, 2 \
+        }; \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, cbrt, std::cbrt, true, 2.0f, 4.0f, 0.001f, -1000.0f, -9.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, rsqrt, reference::rsqrt, true, 2.0f, 4.0f, 0.001f, 1.0f, 100.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, sqrt, std::sqrt, true, 3.0f, 4.0f, 0.001f, 1.0f, 100.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(power, pow, std::pow, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(power, powr, reference::powr, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
+
+// func_name, reference_func, use_ulp, ulp, ulp_for_embedded, min1, max1, min2, max2
+DEFINE_BINARY_POWER_FUNC_INT(pown, std::pow, true, 16.0f, 16.0f, 1.0f, 100.0f, 1, 10)
+DEFINE_BINARY_POWER_FUNC_INT(rootn, reference::rootn, true, 16.0f, 16.0f, -100.0f, 100.0f, -10, 10)
+
+// power functions
+AUTO_TEST_CASE(test_power_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype cbrt(gentype x);
+    // gentype rsqrt(gentype x);
+    // gentype sqrt(gentype x);
+    TEST_UNARY_FUNC_MACRO((power_func_cbrt(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((power_func_sqrt(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((power_func_rsqrt(is_embedded_profile)))
+
+    // gentype pow(gentype x, gentype y);
+    // gentype powr(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((power_func_pow(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((power_func_powr(is_embedded_profile)))
+
+    // gentype pown(gentype x, intn y);
+    // gentype rootn(gentype x, intn y);
+    TEST_BINARY_FUNC_MACRO((power_func_pown(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((power_func_rootn(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP

diff --git a/test_conformance/clcpp/math_funcs/reference.hpp b/test_conformance/clcpp/math_funcs/reference.hpp
new file mode 100644
index 0000000..0f5fc2f
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/reference.hpp

@@ -0,0 +1,315 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
+
+#include <type_traits>
+#include <cmath>
+#include <limits>
+
+#include "../common.hpp"
+
+namespace reference
+{
+    // Reference functions for OpenCL comparison functions that
+    // are not already defined in STL.
+    cl_float maxmag(const cl_float& x, const cl_float& y)
+    {
+        if((std::abs)(x) > (std::abs)(y))
+        {
+            return x;
+        }
+        else if((std::abs)(y) > (std::abs)(x))
+        {
+            return y;
+        }
+        return (std::fmax)(x, y);
+    }
+
+    cl_float minmag(const cl_float& x, const cl_float& y)
+    {
+        if((std::abs)(x) < (std::abs)(y))
+        {
+            return x;
+        }
+        else if((std::abs)(y) < (std::abs)(x))
+        {
+            return y;
+        }
+        return (std::fmin)(x, y);
+    }
+
+    // Reference functions for OpenCL exp functions that
+    // are not already defined in STL.
+    cl_double exp10(const cl_double& x)
+    {   
+        // 10^x = exp2( x * log2(10) )
+        auto log2_10 = (std::log2)(static_cast<long double>(10.0));
+        cl_double x_log2_10 = static_cast<cl_double>(x * log2_10);
+        return (std::exp2)(x_log2_10);
+    }
+
+    // Reference functions for OpenCL floating point functions that
+    // are not already defined in STL.
+    cl_double2 fract(cl_double x)
+    {
+        // Copied from math_brute_force/reference_math.c
+        cl_double2 r;
+        if((std::isnan)(x))
+        {
+            r.s[0] = std::numeric_limits<cl_double>::quiet_NaN();
+            r.s[1] = std::numeric_limits<cl_double>::quiet_NaN();
+            return r;
+        }
+
+        r.s[0] = (std::modf)(x, &(r.s[1]));
+        if(r.s[0] < 0.0 )
+        {
+            r.s[0] = 1.0f + r.s[0];
+            r.s[1] -= 1.0f;
+            if( r.s[0] == 1.0f )
+                r.s[0] = HEX_FLT(+, 1, fffffe, -, 1);
+        }
+        return r;
+    }
+
+    cl_double2 remquo(cl_double x, cl_double y)
+    {
+        cl_double2 r;
+        // remquo return the same value that is returned by the
+        // remainder function
+        r.s[0] = (std::remainder)(x,y);
+        // calulcate quo
+        cl_double x_y = (x - r.s[0]) / y;
+        cl_uint quo = (std::abs)(x_y);
+        r.s[1] = quo & 0x0000007fU;
+        if(x_y < 0.0)
+            r.s[1] = -r.s[1];
+
+        // fix edge cases
+        if(!(std::isnan)(x) && y == 0.0)
+        {
+            r.s[1] = 0;
+        }
+        else if((std::isnan)(x) && (std::isnan)(y))
+        {
+            r.s[1] = 0;
+        }
+        return r;
+    }
+
+    // Reference functions for OpenCL half_math:: functions that
+    // are not already defined in STL.
+    cl_double divide(cl_double x, cl_double y)
+    {
+        return x / y;
+    }
+
+    cl_double recip(cl_double x)
+    {
+        return 1.0 / x;
+    }
+
+    // Reference functions for OpenCL other functions that
+    // are not already defined in STL.
+    cl_double mad(cl_double x, cl_double y, cl_double z)
+    {
+        return (x * y) + z;
+    }
+
+    // Reference functions for OpenCL power functions that
+    // are not already defined in STL.
+    cl_double rsqrt(const cl_double& x)
+    {
+        return cl_double(1.0) / ((std::sqrt)(x));
+    }
+
+    cl_double powr(const cl_double& x, const cl_double& y)
+    {
+        //powr(x, y) returns NaN for x < 0.
+        if( x < 0.0 )
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        //powr ( x, NaN ) returns the NaN for x >= 0.
+        //powr ( NaN, y ) returns the NaN.
+        if((std::isnan)(x) || (std::isnan)(y) )
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        if( x == 1.0 )
+        {
+            //powr ( +1, +-inf ) returns NaN.
+            if((std::abs)(y) == INFINITY )
+                return std::numeric_limits<cl_double>::quiet_NaN();
+
+            //powr ( +1, y ) is 1 for finite y. (NaN handled above)
+            return 1.0;
+        }
+
+        if( y == 0.0 )
+        {
+            //powr ( +inf, +-0 ) returns NaN.
+            //powr ( +-0, +-0 ) returns NaN.
+            if( x == 0.0 || x == std::numeric_limits<cl_double>::infinity())
+                return std::numeric_limits<cl_double>::quiet_NaN();
+
+            //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
+            return 1.0;
+        }
+
+        if( x == 0.0 )
+        {
+            //powr ( +-0, -inf) is +inf.
+            //powr ( +-0, y ) is +inf for finite y < 0.
+            if( y < 0.0 )
+                return std::numeric_limits<cl_double>::infinity();
+
+            //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+            return 0.0;
+        }
+
+        // x = +inf
+        if( (std::isinf)(x) )
+        {
+            if( y < 0 )
+                return 0;
+            return std::numeric_limits<cl_double>::infinity();
+        }
+
+        double fabsx = (std::abs)(x);
+        double fabsy = (std::abs)(y);
+
+        //y = +-inf cases
+        if( (std::isinf)(fabsy) )
+        {
+            if( y < 0.0 )
+            {
+                if( fabsx < 1.0 )
+                    return std::numeric_limits<cl_double>::infinity();
+                return 0;
+            }
+            if( fabsx < 1.0 )
+                return 0.0;
+            return std::numeric_limits<cl_double>::infinity();
+        }        
+        return (std::pow)(x, y);
+    }
+
+    cl_double rootn(const cl_double& x, const cl_int n)
+    {
+        //rootn (x, 0) returns a NaN.
+        if(n == 0)
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+        if(x < 0 && 0 == (n & 1))
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        if(x == 0.0)
+        {
+            if(n > 0)
+            {
+                //rootn ( +-0,  n ) is +0 for even n > 0.
+                if(0 == (n & 1))
+                {
+                    return cl_double(0.0);
+                }
+                //rootn ( +-0,  n ) is +-0 for odd n > 0.
+                else
+                {
+                    return x;
+                }
+            }
+            else
+            {
+                //rootn ( +-0,  n ) is +inf for even n < 0.
+                if(0 == ((-n) & 1))
+                {
+                    return std::numeric_limits<cl_double>::infinity();
+                }
+                //rootn ( +-0,  n ) is +-inf for odd n < 0.
+                else
+                {
+                    return (std::copysign)(
+                        std::numeric_limits<cl_double>::infinity(), x
+                    );
+                }   
+            }
+        }
+
+        cl_double r = (std::abs)(x);
+        r = (std::exp2)((std::log2)(r) / static_cast<cl_double>(n));
+        return (std::copysign)(r, x);
+    }
+
+    // Reference functions for OpenCL trigonometric functions that
+    // are not already defined in STL.
+    cl_double acospi(cl_double x)
+    {
+        return (std::acos)(x) / CL_M_PI;
+    }
+
+    cl_double asinpi(cl_double x)
+    {
+        return (std::asin)(x) / CL_M_PI;
+    }
+
+    cl_double atanpi(cl_double x)
+    {
+        return (std::atan)(x) / CL_M_PI;
+    }
+
+    cl_double cospi(cl_double x)
+    {
+        return (std::cos)(x * CL_M_PI);
+    }
+
+    cl_double sinpi(cl_double x)
+    {
+        return (std::sin)(x * CL_M_PI);
+    }
+
+    cl_double tanpi(cl_double x)
+    {
+        return (std::tan)(x * CL_M_PI);
+    }
+
+    cl_double atan2(cl_double x, cl_double y)
+    {
+    #if defined(WIN32) || defined(_WIN32) 
+        // Fix edge cases for Windows
+        if ((std::isinf)(x) && (std::isinf)(y)) {
+            cl_double retval = (y > 0) ? CL_M_PI_4 : 3.f * CL_M_PI_4;
+            return (x > 0) ? retval : -retval;
+        }
+    #endif // defined(WIN32) || defined(_WIN32) 
+        return (std::atan2)(x, y);
+    }
+
+    cl_double atan2pi(cl_double x, cl_double y)
+    {
+        return ::reference::atan2(x, y) / CL_M_PI;
+    }
+
+    cl_double2 sincos(cl_double x)
+    {
+        cl_double2 r;
+        r.s[0] = (std::sin)(x);
+        r.s[1] = (std::cos)(x);
+        return r;
+    }
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP

diff --git a/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp b/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp
new file mode 100644
index 0000000..343024a
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp

@@ -0,0 +1,222 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acos, std::acos, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acosh, std::acosh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acospi, reference::acospi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asin, std::asin, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinh, std::asinh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinpi, reference::asinpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atan, std::atan, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanh, std::atanh, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanpi, reference::atanpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+
+// For (sin/cos/tan)pi functions min input value is -0.24 and max input value is 0.24,
+// so (CL_M_PI * x) is never greater than CL_M_PI_F.
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cos, std::cos, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cosh, std::cosh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cospi, reference::cospi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sin, std::sin, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinh, std::sinh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinpi, reference::sinpi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tan, std::tan, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanh, std::tanh, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanpi, reference::tanpi, true, 6.0f, 6.0f, 0.001f, -0.24, -0.24f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2, reference::atan2, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2pi, reference::atan2pi, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
+
+// gentype sincos(gentype x, gentype * cosval);
+//
+// Fact that second argument is a pointer is inconvenient.
+//
+// We don't want to modify all helper functions defined in funcs_test_utils.hpp
+// that run test kernels generated based on this class and check if results are
+// correct, so instead of having two output cl_float buffers, one for sines and
+// one for cosines values, we use one cl_float2 output buffer (first component is
+// sine, second is cosine).
+//
+// Below we also define specialization of generate_kernel_unary function template
+// for trigonometric_func_sincos.
+struct trigonometric_func_sincos : public unary_func<cl_float, cl_float2>
+{
+    trigonometric_func_sincos(bool is_embedded) : m_is_embedded(is_embedded) 
+    {
+
+    }
+
+    std::string str()
+    {
+        return "sincos";
+    }
+
+    std::string headers() 
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    /* Reference value type is cl_double */
+    cl_double2 operator()(const cl_float& x) 
+    {
+        return (reference::sincos)(static_cast<cl_double>(x));
+    }
+
+    cl_float min1()
+    {
+        return -CL_M_PI_F;
+    }
+
+    cl_float max1()
+    {
+        return CL_M_PI_F;
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 4.0f;
+        }
+        return 4.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)    
+template <>
+std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
+{    
+    return 
+        "__kernel void test_sincos(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 sine_cosine_of_x;\n"
+        "    float cosine_of_x = 0;\n"
+        "    sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
+        "    sine_cosine_of_x.y = cosine_of_x;\n"
+        "    output[gid] = sine_cosine_of_x;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
+{
+    return         
+        "" + func.defs() + 
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_sincos(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 sine_cosine_of_x;\n"
+        "    float cosine_of_x = 0;\n"
+        "    sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
+        "    sine_cosine_of_x.y = cosine_of_x;\n"
+        "    output[gid] = sine_cosine_of_x;\n"
+        "}\n";
+}
+#endif
+
+// trigonometric functions
+AUTO_TEST_CASE(test_trigonometric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype acos(gentype x);
+    // gentype acosh(gentype x);
+    // gentype acospi(gentype x);
+    // gentype asin(gentype x);
+    // gentype asinh(gentype x);
+    // gentype asinpi(gentype x);
+    // gentype atan(gentype x);
+    // gentype atanh(gentype x);
+    // gentype atanpi(gentype x);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acosh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acospi(is_embedded_profile))) 
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asinh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asinpi(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atan(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atanh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atanpi(is_embedded_profile)))
+
+    // gentype cos(gentype x);
+    // gentype cosh(gentype x);
+    // gentype cospi(gentype x);
+    // gentype sin(gentype x);
+    // gentype sinh(gentype x);
+    // gentype sinpi(gentype x);
+    // gentype tan(gentype x);
+    // gentype tanh(gentype x);
+    // gentype tanpi(gentype x);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cosh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cospi(is_embedded_profile))) 
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sinh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sinpi(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tan(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tanh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tanpi(is_embedded_profile)))
+
+    // gentype atan2(gentype y, gentype x);
+    // gentype atan2pi(gentype y, gentype x);
+    TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2pi(is_embedded_profile)))
+
+    // gentype sincos(gentype x, gentype * cosval);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sincos(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP

diff --git a/test_conformance/clcpp/pipes/CMakeLists.txt b/test_conformance/clcpp/pipes/CMakeLists.txt
new file mode 100644
index 0000000..65daae9
--- /dev/null
+++ b/test_conformance/clcpp/pipes/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_PIPES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/pipes/main.cpp b/test_conformance/clcpp/pipes/main.cpp
new file mode 100644
index 0000000..de6e622
--- /dev/null
+++ b/test_conformance/clcpp/pipes/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_pipes.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/pipes/test_pipes.hpp b/test_conformance/clcpp/pipes/test_pipes.hpp
new file mode 100644
index 0000000..3fc30dc
--- /dev/null
+++ b/test_conformance/clcpp/pipes/test_pipes.hpp

@@ -0,0 +1,632 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
+#define TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_pipes {
+
+enum class pipe_source
+{
+    param,
+    storage
+};
+
+enum class pipe_operation
+{
+    work_item,
+    work_item_reservation,
+    work_group_reservation,
+    sub_group_reservation
+};
+
+struct test_options
+{
+    pipe_operation operation;
+    pipe_source source;
+    int max_packets;
+    int num_packets;
+};
+
+struct output_type
+{
+    cl_uint write_reservation_is_valid;
+    cl_uint write_success;
+
+    cl_uint num_packets;
+    cl_uint max_packets;
+    cl_uint read_reservation_is_valid;
+    cl_uint read_success;
+
+    cl_uint value;
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    uint write_reservation_is_valid;
+    uint write_success;
+
+    uint num_packets;
+    uint max_packets;
+    uint read_reservation_is_valid;
+    uint read_success;
+
+    uint value;
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    if (options.operation == pipe_operation::work_item)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].write_reservation_is_valid = 1;
+
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, &value) == 0;
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+
+        output[gid].read_reservation_is_valid = 1;
+
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, &value) == 0;
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_item_reservation)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        reserve_id_t reservation = reserve_write_pipe(out_pipe, 2);
+        output[gid + 0].write_reservation_is_valid = is_valid_reserve_id(reservation);
+        output[gid + 1].write_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value0 = gid + 0;
+        uint value1 = gid + 1;
+        output[gid + 0].write_success = write_pipe(out_pipe, reservation, 0, &value0) == 0;
+        output[gid + 1].write_success = write_pipe(out_pipe, reservation, 1, &value1) == 0;
+        commit_write_pipe(out_pipe, reservation);
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        output[gid + 0].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid + 0].max_packets = get_pipe_max_packets(in_pipe);
+        output[gid + 1].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid + 1].max_packets = get_pipe_max_packets(in_pipe);
+
+        reserve_id_t reservation = reserve_read_pipe(in_pipe, 2);
+        output[gid + 0].read_reservation_is_valid = is_valid_reserve_id(reservation);
+        output[gid + 1].read_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value0;
+        uint value1;
+        output[gid + 0].read_success = read_pipe(in_pipe, reservation, 1, &value0) == 0;
+        output[gid + 1].read_success = read_pipe(in_pipe, reservation, 0, &value1) == 0;
+        commit_read_pipe(in_pipe, reservation);
+        output[gid + 0].value = value0;
+        output[gid + 1].value = value1;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_group_reservation)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        reserve_id_t reservation = work_group_reserve_write_pipe(out_pipe, get_local_size(0));
+        output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, reservation, get_local_id(0), &value) == 0;
+        work_group_commit_write_pipe(out_pipe, reservation);
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+
+        reserve_id_t reservation = work_group_reserve_read_pipe(in_pipe, get_local_size(0));
+        output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, reservation, get_local_size(0) - 1 - get_local_id(0), &value) == 0;
+        work_group_commit_read_pipe(in_pipe, reservation);
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::sub_group_reservation)
+    {
+        s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        reserve_id_t reservation = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());
+        output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, reservation, get_sub_group_local_id(), &value) == 0;
+        sub_group_commit_write_pipe(out_pipe, reservation);
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+
+        reserve_id_t reservation = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());
+        output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, reservation, get_sub_group_size() - 1 - get_sub_group_local_id(), &value) == 0;
+        sub_group_commit_read_pipe(in_pipe, reservation);
+        output[gid].value = value;
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_common>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    #include <opencl_pipe>
+    using namespace cl;
+    )";
+
+    s << source_common;
+
+    std::string init_out_pipe;
+    std::string init_in_pipe;
+    if (options.source == pipe_source::param)
+    {
+        init_out_pipe = "auto out_pipe = pipe_param;";
+        init_in_pipe = "auto in_pipe = pipe_param;";
+    }
+    else if (options.source == pipe_source::storage)
+    {
+        s << "pipe_storage<uint, " << std::to_string(options.max_packets) << "> storage;";
+        init_out_pipe = "auto out_pipe = storage.get<pipe_access::write>();";
+        init_in_pipe = "auto in_pipe = make_pipe(storage);";
+    }
+
+    if (options.operation == pipe_operation::work_item)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].write_reservation_is_valid = 1;
+
+        uint value = gid;
+        output[gid].write_success = out_pipe.write(value);
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+
+        output[gid].read_reservation_is_valid = 1;
+
+        uint value;
+        output[gid].read_success = in_pipe.read(value);
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_item_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        auto reservation = out_pipe.reserve(2);
+        output[gid + 0].write_reservation_is_valid = reservation.is_valid();
+        output[gid + 1].write_reservation_is_valid = reservation.is_valid();
+
+        uint value0 = gid + 0;
+        uint value1 = gid + 1;
+        output[gid + 0].write_success = reservation.write(0, value0);
+        output[gid + 1].write_success = reservation.write(1, value1);
+        reservation.commit();
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        output[gid + 0].num_packets = in_pipe.num_packets();
+        output[gid + 0].max_packets = in_pipe.max_packets();
+        output[gid + 1].num_packets = in_pipe.num_packets();
+        output[gid + 1].max_packets = in_pipe.max_packets();
+
+        auto reservation = in_pipe.reserve(2);
+        output[gid + 0].read_reservation_is_valid = reservation.is_valid();
+        output[gid + 1].read_reservation_is_valid = reservation.is_valid();
+
+        uint value0;
+        uint value1;
+        output[gid + 0].read_success = reservation.read(1, value0);
+        output[gid + 1].read_success = reservation.read(0, value1);
+        reservation.commit();
+        output[gid + 0].value = value0;
+        output[gid + 1].value = value1;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_group_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        auto reservation = out_pipe.work_group_reserve(get_local_size(0));
+        output[gid].write_reservation_is_valid = reservation.is_valid();
+
+        uint value = gid;
+        output[gid].write_success = reservation.write(get_local_id(0), value);
+        reservation.commit();
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+
+        auto reservation = in_pipe.work_group_reserve(get_local_size(0));
+        output[gid].read_reservation_is_valid = reservation.is_valid();
+
+        uint value;
+        output[gid].read_success = reservation.read(get_local_size(0) - 1 - get_local_id(0), value);
+        reservation.commit();
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::sub_group_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        auto reservation = out_pipe.sub_group_reserve(get_sub_group_size());
+        output[gid].write_reservation_is_valid = reservation.is_valid();
+
+        uint value = gid;
+        output[gid].write_success = reservation.write(get_sub_group_local_id(), value);
+        reservation.commit();
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+
+        auto reservation = in_pipe.sub_group_reserve(get_sub_group_size());
+        output[gid].read_reservation_is_valid = reservation.is_valid();
+
+        uint value;
+        output[gid].read_success = reservation.read(get_sub_group_size() - 1 - get_sub_group_local_id(), value);
+        reservation.commit();
+        output[gid].value = value;
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    if (options.num_packets % 2 != 0 || options.max_packets < options.num_packets)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Invalid test options")
+    }
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (options.operation == pipe_operation::sub_group_reservation && !is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    cl_program program;
+    cl_kernel producer_kernel;
+    cl_kernel consumer_kernel;
+
+    std::string producer_kernel_name = "producer";
+    std::string consumer_kernel_name = "consumer";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+    consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel")
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel")
+#endif
+
+    size_t max_work_group_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    const size_t count = options.num_packets;
+    const size_t local_size = (std::min)((size_t)256, max_work_group_size);
+    const size_t global_size = count;
+
+    const cl_uint packet_size = sizeof(cl_uint);
+
+    cl_mem pipe = clCreatePipe(context, 0, packet_size, options.max_packets, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreatePipe")
+
+    cl_mem output_buffer;
+    output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    const char pattern = 0;
+    error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+    error = clSetKernelArg(producer_kernel, 0, sizeof(cl_mem), &pipe);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(producer_kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    error = clEnqueueNDRangeKernel(queue, producer_kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clSetKernelArg(consumer_kernel, 0, sizeof(cl_mem), &pipe);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(consumer_kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    error = clEnqueueNDRangeKernel(queue, consumer_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    std::vector<output_type> output(count);
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(output_type) * count,
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    std::vector<bool> existing_values(count, false);
+    for (size_t gid = 0; gid < count; gid++)
+    {
+        const output_type &o = output[gid];
+
+        if (!o.write_reservation_is_valid)
+        {
+            RETURN_ON_ERROR_MSG(-1, "write reservation is not valid")
+        }
+        if (!o.write_success)
+        {
+            RETURN_ON_ERROR_MSG(-1, "write did not succeed")
+        }
+
+        if (o.num_packets == 0 || o.num_packets > options.num_packets)
+        {
+            RETURN_ON_ERROR_MSG(-1, "num_packets did not return correct value")
+        }
+        if (o.max_packets != options.max_packets)
+        {
+            RETURN_ON_ERROR_MSG(-1, "max_packets did not return correct value")
+        }
+        if (!o.read_reservation_is_valid)
+        {
+            RETURN_ON_ERROR_MSG(-1, "read reservation is not valid")
+        }
+        if (!o.read_success)
+        {
+            RETURN_ON_ERROR_MSG(-1, "read did not succeed")
+        }
+
+        // Every value must be presented once in any order
+        if (o.value >= count || existing_values[o.value])
+        {
+            RETURN_ON_ERROR_MSG(-1, "kernel did not return correct value")
+        }
+        existing_values[o.value] = true;
+    }
+
+    clReleaseMemObject(pipe);
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(producer_kernel);
+    clReleaseKernel(consumer_kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+const pipe_operation pipe_operations[] = {
+    pipe_operation::work_item,
+    pipe_operation::work_item_reservation,
+    pipe_operation::work_group_reservation,
+    pipe_operation::sub_group_reservation
+};
+
+const std::tuple<int, int> max_and_num_packets[] = {
+    std::make_tuple<int, int>(2, 2),
+    std::make_tuple<int, int>(10, 8),
+    std::make_tuple<int, int>(256, 254),
+    std::make_tuple<int, int>(1 << 16, 1 << 16),
+    std::make_tuple<int, int>((1 << 16) + 5, 1 << 16),
+    std::make_tuple<int, int>(12345, 12344),
+    std::make_tuple<int, int>(1 << 18, 1 << 18)
+};
+
+AUTO_TEST_CASE(test_pipes_pipe)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    std::vector<std::tuple<int, int>> ps;
+    for (auto p : max_and_num_packets)
+    {
+        if (std::get<0>(p) < num_elements)
+            ps.push_back(p);
+    }
+    ps.push_back(std::tuple<int, int>(num_elements, num_elements));
+
+    int error = CL_SUCCESS;
+
+    for (auto operation : pipe_operations)
+    for (auto p : ps)
+    {
+        test_options options;
+        options.source = pipe_source::param;
+        options.max_packets = std::get<0>(p);
+        options.num_packets = std::get<1>(p);
+        options.operation = operation;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_pipes_pipe_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    std::vector<std::tuple<int, int>> ps;
+    for (auto p : max_and_num_packets)
+    {
+        if (std::get<0>(p) < num_elements)
+            ps.push_back(p);
+    }
+    ps.push_back(std::tuple<int, int>(num_elements, num_elements));
+
+    int error = CL_SUCCESS;
+
+    for (auto operation : pipe_operations)
+    for (auto p : ps)
+    {
+        test_options options;
+        options.source = pipe_source::storage;
+        options.max_packets = std::get<0>(p);
+        options.num_packets = std::get<1>(p);
+        options.operation = operation;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP

diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt b/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt
new file mode 100644
index 0000000..fd36d30
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_PROGRAM_SCOPE_CTORS_DTORS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp
new file mode 100644
index 0000000..35bf81c
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp

@@ -0,0 +1,284 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
+
+#include <algorithm>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#define RUN_PS_CTORS_DTORS_TEST_MACRO(TEST_CLASS) \
+    last_error = run_ps_ctor_dtor_test(  \
+        device, context, queue, count, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+// Base class for all tests for kernels with program scope object with
+// non-trivial ctors and/or dtors
+struct ps_ctors_dtors_test_base : public detail::base_func_type<cl_uint>
+{
+    // ctor is true, if and only if OpenCL program of this test contains program
+    // scope variable with non-trivial ctor.
+    // dtor is true, if and only if OpenCL program of this test contains program
+    // scope variable with non-trivial dtor.
+    ps_ctors_dtors_test_base(const bool ctor,
+                             const bool dtor)
+        : m_ctor(ctor), m_dtor(dtor)
+    {
+
+    }
+    virtual ~ps_ctors_dtors_test_base() { };
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+    // Returns kernel names IN ORDER
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+    // Returns value that is expected to be in output_buffer[i]
+    virtual cl_uint operator()(size_t i) = 0;
+    // Executes kernels
+    // Typical case: execute every kernel once, every kernel has only
+    // one argument, that is, output buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+    // This method check if queries for CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
+    // and CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT using clGetProgramInfo()
+    // return correct values
+    virtual cl_int ctors_dtors_present_queries(cl_program program)
+    {
+        cl_int error = CL_SUCCESS;
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return error;
+        #else
+            // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool
+            // This indicates that the program object contains non-trivial constructor(s) that will be
+            // executed by runtime before any kernel from the program is executed.
+
+            // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool
+            // This indicates that the program object contains non-trivial destructor(s) that will be
+            // executed by runtime when program is destroyed.
+
+            // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
+            cl_bool ctors_present;
+            size_t cl_bool_size;
+            error = clGetProgramInfo(
+                program,
+                CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
+                sizeof(cl_bool),
+                static_cast<void*>(&ctors_present),
+                &cl_bool_size
+            );
+            RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+            if(cl_bool_size != sizeof(cl_bool))
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
+                    cl_bool_size,
+                    sizeof(cl_bool)
+                );
+            }
+
+            // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT
+            cl_bool dtors_present = 0;
+            error = clGetProgramInfo(
+                program,
+                CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
+                sizeof(cl_bool),
+                static_cast<void*>(&ctors_present),
+                &cl_bool_size
+            );
+            RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+            if(cl_bool_size != sizeof(cl_bool))
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
+                    cl_bool_size,
+                    sizeof(cl_bool)
+                );
+            }
+
+            // check constructors
+            if(m_ctor && ctors_present != CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.\n"
+                );
+            }
+            else if(!m_ctor && ctors_present == CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.\n"
+                );
+            }
+
+            // check destructors
+            if(m_dtor && dtors_present != CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.\n"
+                );
+            }
+            else if(!m_dtor && dtors_present == CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.\n"
+                );
+            }
+            return error;
+        #endif
+    }
+
+private:
+    bool m_ctor;
+    bool m_dtor;
+};
+
+template <class ps_ctor_dtor_test>
+int run_ps_ctor_dtor_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ps_ctor_dtor_test op)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t work_size[1];
+    cl_int err;
+
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+
+    work_size[0] = count;
+    // host output vector
+    std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
+
+    // device output buffer
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0]);
+    RETURN_ON_ERROR(err)
+
+    // Check if queries returns correct values
+    err = op.ctors_dtors_present_queries(program);
+    RETURN_ON_ERROR(err);
+
+    // Release kernels and program
+    // Destructors should be called now
+    for(auto& k : kernels)
+    {
+        err = clReleaseKernel(k);
+        RETURN_ON_CL_ERROR(err, "clReleaseKernel");
+    }
+    err = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(err, "clReleaseProgram");
+
+    // Finish
+    err = clFinish(queue);
+    RETURN_ON_CL_ERROR(err, "clFinish");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        cl_uint v = op(i);
+        if(!(are_equal(v, output[i], detail::make_value<cl_uint>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<cl_uint>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP

diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp b/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp
new file mode 100644
index 0000000..08c1908
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp

@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_ctors_dtors.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp
new file mode 100644
index 0000000..c9ac082
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp

@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
+
+#include "common.hpp"
+
+// Test for program scope variable with non-trivial ctor
+struct ps_ctor_test : public ps_ctors_dtors_test_base
+{
+    ps_ctor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(true, false),
+          m_test_value(test_value)
+    {
+
+    }
+    
+    std::string str()
+    {
+        return "ps_ctor_test";
+    }
+
+    std::vector<std::string> get_kernel_names()
+    {
+        return { 
+            this->str() + "_set",
+            this->str() + "_read"
+        };
+    }
+
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        if(i % 2 == 0)
+            return m_test_value;
+        return cl_uint(0xbeefbeef);
+    }
+
+    // In 1st kernel 0th work-tem sets member m_x of program scope variable global_var to
+    // m_test_value and m_y to uint(0xbeefbeef),
+    // In 2nd kernel:
+    // 1) if global id is even, then work-item reads global_var.m_x and writes it to output[its-global-id];
+    // 2) otherwise, work-item reads global_var.m_y and writes it to output[its-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_names()[0] + "(global uint *output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   output[gid] = 0xbeefbeef;\n"
+                "}\n"
+                "__kernel void " + this->get_kernel_names()[1] + "(global uint *output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid % 2 == 0)\n"
+                "      output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class_base {\n"
+                // non-trivial ctor
+                "   ctor_test_class_base(T x) { m_x = x;};\n"
+                "   T m_x;\n"
+                "};\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class : public ctor_test_class_base<T> {\n"
+                // non-trivial ctor
+                "   ctor_test_class(T x, T y) : ctor_test_class_base<T>(x), m_y(y) { };\n"
+                "   T m_y;\n"
+                "};\n"
+                // global scope program variables
+                "ctor_test_class<uint> global_var(uint(0), uint(0));\n"
+
+                "__kernel void " + this->get_kernel_names()[0] + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid == 0) {\n"
+                "       global_var.m_x = " + std::to_string(m_test_value) + ";\n"  
+                "       global_var.m_y = 0xbeefbeef;\n"  
+                "   }\n"
+                "}\n"
+
+                "__kernel void " + this->get_kernel_names()[1] + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid % 2 == 0)\n"
+                "      output[gid] = global_var.m_x;\n"
+                "   else\n"
+                "      output[gid] = global_var.m_y;\n"
+                "}\n";        
+        #endif
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+// Test for program scope variable with non-trivial dtor
+struct ps_dtor_test : public ps_ctors_dtors_test_base
+{
+    ps_dtor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(false, true),
+          m_test_value(test_value)
+    {
+
+    }
+    
+    std::string str()
+    {
+        return "ps_dtor_test";
+    }
+
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        if(i % 2 == 0)
+            return m_test_value;
+        return 1;
+    }
+
+    // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
+    // variable global_var, it also sets counter to 1;
+    // After global_var is destroyed all even elements of output buffer should equal m_test_value, 
+    // and all odd should equal 1.
+    // If odd elements of output buffer are >1 it means dtor was executed more than once.
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    if(gid % 2 == 0)\n"
+                "        output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "    else\n"
+                "        output[gid] = 1;\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"
+                "struct dtor_test_class_base {\n"
+                // non-trivial dtor
+                // set all odd elements in buffer to counter
+                "   ~dtor_test_class_base() {\n"
+                "       for(size_t i = 1; i < this->size; i+=2)\n"
+                "       {\n"
+                "           this->buffer[i] = counter;\n"
+                "       }\n"
+                "       counter++;\n"
+                "   };\n"
+                "   global_ptr<uint[]> buffer;\n"
+                "   size_t size;\n"
+                "   T counter;\n"
+                "};\n" 
+                // struct   
+                "struct dtor_test_class : public dtor_test_class_base<uint> {\n"
+                // non-trivial dtor
+                // set all values in buffer to m_test_value
+                "   ~dtor_test_class() {\n"
+                "       for(size_t i = 0; i < this->size; i+=2)\n"
+                "           this->buffer[i] = " + std::to_string(m_test_value) + ";\n"
+                "   };\n"
+                "};\n" 
+                // global scope program variable
+                "dtor_test_class global_var;\n"
+
+                // When global_var is being destroyed, first dtor ~dtor_test_class is called,
+                // and then ~dtor_test_class_base is called.
+
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                // set buffer and size in global var
+                "   if(gid == 0){\n"
+                "       global_var.buffer = output;\n"
+                "       global_var.size = get_global_size(0);\n"
+                "       global_var.counter = 1;\n"
+                "   }\n"
+                "}\n";
+        #endif
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+// Test for program scope variable with both non-trivial ctor
+// and non-trivial dtor
+struct ps_ctor_dtor_test : public ps_ctors_dtors_test_base
+{
+    ps_ctor_dtor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(false, true),
+          m_test_value(test_value)
+    {
+
+    }
+    
+    std::string str()
+    {
+        return "ps_ctor_dtor_test";
+    }
+
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        return m_test_value;
+    }
+
+    // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
+    // variable global_var.
+    // After global_var is destroyed all even elements of output buffer should equal m_test_value, 
+    // and all odd should equal 1.
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class {\n"
+                // non-trivial ctor
+                "   ctor_test_class(T value) : m_value(value) { };\n"
+                "   T m_value;\n"
+                "};\n\n"
+                // struct   
+                "struct ctor_dtor_test_class {\n"
+                // non-trivial ctor
+                "   ctor_dtor_test_class(uint value) : ctor_test(value) { } \n"
+                // non-trivial dtor
+                // set all values in buffer to m_test_value
+                "   ~ctor_dtor_test_class() {\n"
+                "       for(size_t i = 0; i < this->size; i++)\n"
+                "       {\n"
+                "          this->buffer[i] = ctor_test.m_value;\n"            
+                "       }\n"
+                "   };\n"
+                "   ctor_test_class<uint> ctor_test;\n"
+                "   global_ptr<uint[]> buffer;\n"
+                "   size_t size;\n"
+                "};\n" 
+                // global scope program variable
+                "ctor_dtor_test_class global_var(" + std::to_string(m_test_value) + ");\n"
+
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                // set buffer and size in global var
+                "   if(gid == 0){\n"
+                "       global_var.buffer = output;\n"
+                "       global_var.size = get_global_size(0);\n"
+                "   }\n"
+                "}\n";
+        #endif
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+// This contains tests for program scope (global) constructors and destructors, more
+// detailed tests are also in clcpp/api.
+AUTO_TEST_CASE(test_program_scope_ctors_dtors)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_test(0xdeadbeefU))
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_dtor_test(0xbeefdeadU))
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_dtor_test(0xdeaddeadU))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP

diff --git a/test_conformance/clcpp/reinterpret/CMakeLists.txt b/test_conformance/clcpp/reinterpret/CMakeLists.txt
new file mode 100644
index 0000000..ed02c56
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_REINTERPRET)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/reinterpret/as_type.hpp b/test_conformance/clcpp/reinterpret/as_type.hpp
new file mode 100644
index 0000000..da088cf
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/as_type.hpp

@@ -0,0 +1,223 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <cstring>
+
+
+template<class IN1, class OUT1>
+struct as_type : public unary_func<IN1, OUT1>
+{
+    static_assert(sizeof(IN1) == sizeof(OUT1), "It is an error to use the as_type<T> operator to reinterpret data to a type of a different number of bytes");
+
+    std::string str()
+    {
+        return "as_type";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_reinterpret>\n";
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        return *reinterpret_cast<const OUT1*>(&x);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_as_type(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string function_call = "as_" + type_name<out_type>() + "(" + in1_value + ");";
+    return
+        "__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_as_type(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    std::string function_call = "as_type<" + type_name<out_type>() + ">(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class as_type_op>
+bool verify_as_type(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, as_type_op op)
+{
+    // When the operand and result type contain a different number of elements, the result is implementation-defined,
+    // i.e. any result is correct
+    if (vector_size<INPUT>::value == vector_size<OUTPUT>::value)
+    {
+        for (size_t i = 0; i < in.size(); i++)
+        {
+            auto expected = op(in[i]);
+            if (std::memcmp(&expected, &out[i], sizeof(expected)) != 0)
+            {
+                print_error_msg(expected, out[i], i, op);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template <class as_type_op>
+int test_as_type_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, as_type_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int error;
+
+    typedef typename as_type_op::in_type INPUT;
+    typedef typename as_type_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_as_type<as_type_op, INPUT, OUTPUT>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(error)
+#else
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    if (!verify_as_type(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_as_type)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_AS_TYPE_MACRO(TYPE1, TYPE2) \
+    last_error = test_as_type_func( \
+        device, context, queue, n_elems, as_type<TYPE1, TYPE2>() \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_AS_TYPE_MACRO(cl_int, cl_int)
+    TEST_AS_TYPE_MACRO(cl_uint, cl_int)
+    TEST_AS_TYPE_MACRO(cl_int, cl_ushort2)
+    TEST_AS_TYPE_MACRO(cl_uchar, cl_uchar)
+    TEST_AS_TYPE_MACRO(cl_char4, cl_ushort2)
+    TEST_AS_TYPE_MACRO(cl_uchar16, cl_char16)
+    TEST_AS_TYPE_MACRO(cl_short8, cl_uchar16)
+    TEST_AS_TYPE_MACRO(cl_float4, cl_uint4)
+    TEST_AS_TYPE_MACRO(cl_float16, cl_int16)
+    TEST_AS_TYPE_MACRO(cl_long2, cl_float4)
+    TEST_AS_TYPE_MACRO(cl_ulong, cl_long)
+    TEST_AS_TYPE_MACRO(cl_ulong16, cl_double16)
+    TEST_AS_TYPE_MACRO(cl_uchar16, cl_double2)
+    TEST_AS_TYPE_MACRO(cl_ulong4, cl_short16)
+
+#undef TEST_AS_TYPE_MACRO
+
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+
+#endif // TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP

diff --git a/test_conformance/clcpp/reinterpret/main.cpp b/test_conformance/clcpp/reinterpret/main.cpp
new file mode 100644
index 0000000..8eddf1d
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "as_type.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/relational_funcs/CMakeLists.txt b/test_conformance/clcpp/relational_funcs/CMakeLists.txt
new file mode 100644
index 0000000..3a8389c
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_RELATIONAL_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/relational_funcs/common.hpp b/test_conformance/clcpp/relational_funcs/common.hpp
new file mode 100644
index 0000000..a13f7ba
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/common.hpp

@@ -0,0 +1,112 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+#include <cmath>
+
+template<class IN1, class IN2, class IN3, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i], in2.s[i], in3.s[i]);
+    }
+    return result;
+}
+
+template<class IN1, class IN2, class IN3, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result = func(in1, in2, in3);
+    return result;
+}
+
+
+template<class IN1, class IN2, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i], in2.s[i]);
+    }
+    return result;
+}
+
+template<class IN1, class IN2, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result = func(in1, in2);
+    return result;
+}
+
+template<class IN1, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i]);
+    }
+    return result;
+}
+
+template<class IN1, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result = func(in1);
+    return result;
+}
+
+template<class IN1>
+cl_int perform_all_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
+{
+    cl_int result = 1;
+    for(size_t i = 0; i < vector_size<IN1>::value; i++)
+    {
+        result = (in1.s[i] != 0) ? result : cl_int(0);
+    }
+    return result;
+}
+
+cl_int perform_all_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
+{
+    return (in1 != 0) ? cl_int(1) : cl_int(0);
+}
+
+template<class IN1>
+cl_int perform_any_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
+{
+    cl_int result = 0;
+    for(size_t i = 0; i < vector_size<IN1>::value; i++)
+    {
+        result = (in1.s[i] != 0) ? cl_int(1) : result;
+    }
+    return result;
+}
+
+cl_int perform_any_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
+{
+    return (in1 != 0) ? cl_int(1) : cl_int(0);
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP

diff --git a/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp b/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp
new file mode 100644
index 0000000..980d67c
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp

@@ -0,0 +1,150 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
+
+#include "common.hpp"
+
+// This marco creates a class wrapper for comparision function we want to test.
+#define DEF_COMPARISION_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public binary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x, const input_type& y) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, input_type, result_type>( \
+            x, y, \
+            [](const SCALAR& a, const SCALAR& b) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    input_type min2() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max2() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+    \
+    std::vector<input_type> in2_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+};
+
+DEF_COMPARISION_FUNC(comparison_func_isequal, isequal, (a == b))
+DEF_COMPARISION_FUNC(comparison_func_isnotequal, isnotequal, !(a == b))
+DEF_COMPARISION_FUNC(comparison_func_isgreater, isgreater, (std::isgreater)(a, b))
+DEF_COMPARISION_FUNC(comparison_func_isgreaterequal, isgreaterequal, ((std::isgreater)(a, b) || a == b))
+DEF_COMPARISION_FUNC(comparison_func_isless, isless, (std::isless)(a, b))
+DEF_COMPARISION_FUNC(comparison_func_islessequal, islessequal, ((std::isless)(a, b) || a == b))
+DEF_COMPARISION_FUNC(comparison_func_islessgreater, islessgreater, ((a < b) || (a > b)))
+
+#undef DEF_COMPARISION_FUNC
+
+AUTO_TEST_CASE(test_relational_comparison_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// Helper macro, so we don't have to repreat the same code.  
+#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>())
+
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isnotequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreater)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreaterequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isless)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessgreater)
+
+#undef TEST_BINARY_REL_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP

diff --git a/test_conformance/clcpp/relational_funcs/main.cpp b/test_conformance/clcpp/relational_funcs/main.cpp
new file mode 100644
index 0000000..99b0e5a
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/main.cpp

@@ -0,0 +1,26 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "comparison_funcs.hpp"
+#include "select_funcs.hpp"
+#include "test_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/relational_funcs/select_funcs.hpp b/test_conformance/clcpp/relational_funcs/select_funcs.hpp
new file mode 100644
index 0000000..2e6f6bd
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/select_funcs.hpp

@@ -0,0 +1,158 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
+
+#include "common.hpp"
+
+template <class IN1, cl_int N /* Vector size */>
+struct select_func_select : public ternary_func<
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    typename make_vector_type<IN1, N>::type /* create IN1N type */
+                                 >
+{
+    typedef typename make_vector_type<IN1, N>::type input1_type;
+    typedef typename make_vector_type<IN1, N>::type input2_type;
+    typedef typename make_vector_type<cl_int, N>::type input3_type;
+    typedef typename make_vector_type<IN1, N>::type result_type;
+   
+    std::string str()
+    {
+        return "select";
+    }
+   
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+   
+    result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
+    {   
+        typedef typename scalar_type<input1_type>::type SCALAR1;
+        typedef typename scalar_type<input2_type>::type SCALAR2;
+        typedef typename scalar_type<input3_type>::type SCALAR3;
+
+        return perform_function<input1_type, input2_type, input3_type, result_type>(
+            x, y, z,
+            [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
+            {
+                    return (c != 0) ? b : a;
+            }
+        );
+    }
+
+    bool is_in3_bool()
+    {
+        return true;
+    }
+   
+    std::vector<input3_type> in3_special_cases()
+    {
+        return { 
+            detail::make_value<input3_type>(0),
+            detail::make_value<input3_type>(1),
+            detail::make_value<input3_type>(12),
+            detail::make_value<input3_type>(-12)
+        };
+    }
+};
+
+template <class IN1, cl_int N /* Vector size */>
+struct select_func_bitselect : public ternary_func<
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create cl_intN type */
+                                    typename make_vector_type<IN1, N>::type /* create IN1N type */
+                                 >
+{
+    typedef typename make_vector_type<IN1, N>::type input1_type;
+    typedef typename make_vector_type<IN1, N>::type input2_type;
+    typedef typename make_vector_type<IN1, N>::type input3_type;
+    typedef typename make_vector_type<IN1, N>::type result_type;
+   
+    std::string str()
+    {
+        return "bitselect";
+    }
+   
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+   
+    result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
+    {  
+        static_assert(
+            std::is_integral<IN1>::value,
+            "bitselect test is implemented only for integers."
+        ); 
+        static_assert(
+            std::is_unsigned<IN1>::value,
+            "IN1 type should be unsigned, bitwise operations on signed int may cause problems."
+        );
+        typedef typename scalar_type<input1_type>::type SCALAR1;
+        typedef typename scalar_type<input2_type>::type SCALAR2;
+        typedef typename scalar_type<input3_type>::type SCALAR3;
+
+        return perform_function<input1_type, input2_type, input3_type, result_type>(
+            x, y, z,
+            [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
+            {
+                return (~c & a) | (c & b);
+            }
+        );
+    }
+};
+
+AUTO_TEST_CASE(test_relational_select_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// Tests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS 
+// mode, because this functions in OpenCL C requires different reference functions on host
+// compared to their equivalent in OpenCL C++.
+// (In OpenCL C the result of select(), when gentype is vector type, is based on the most
+// significant bits of c components)
+#ifndef USE_OPENCLC_KERNELS
+    // gentype select(gentype a, gentype b, booln c)
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  1>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_float, 2>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_short, 4>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  8>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  16>()))
+#else
+    log_info("WARNING:\n\tTests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS mode\n");
+#endif
+
+    // gentype bitselect(gentype a, gentype b, gentype c)
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 1>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 2>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uchar, 4>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 8>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 16>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP

diff --git a/test_conformance/clcpp/relational_funcs/test_funcs.hpp b/test_conformance/clcpp/relational_funcs/test_funcs.hpp
new file mode 100644
index 0000000..77e3d87
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/test_funcs.hpp

@@ -0,0 +1,336 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
+
+#include "common.hpp"
+
+// This marco creates a class wrapper for unary test function we want to test.
+#define DEF_UNARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public unary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, result_type>( \
+            x, \
+            [](const SCALAR& a) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+};
+
+// This marco creates a class wrapper for binary test function we want to test.
+#define DEF_BINARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public binary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x, const input_type& y) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, input_type, result_type>( \
+            x, y, \
+            [](const SCALAR& a, const SCALAR& b) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    input_type min2() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max2() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+    \
+    std::vector<input_type> in2_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+};
+
+DEF_UNARY_TEST_FUNC(test_func_isfinite, isfinite, (std::isfinite)(a))
+DEF_UNARY_TEST_FUNC(test_func_isinf, isinf, (std::isinf)(a))
+DEF_UNARY_TEST_FUNC(test_func_isnan, isnan, (std::isnan)(a))
+DEF_UNARY_TEST_FUNC(test_func_isnormal, isnormal, (std::isnormal)(a))
+DEF_UNARY_TEST_FUNC(test_func_signbit, signbit , (std::signbit)(a))
+
+DEF_BINARY_TEST_FUNC(test_func_isordered, isordered, !(std::isunordered)(a, b))
+DEF_BINARY_TEST_FUNC(test_func_isunordered, isunordered, (std::isunordered)(a, b))
+
+#undef DEF_UNARY_TEST_FUNC
+#undef DEF_BINARY_TEST_FUNC
+
+template <cl_int N /* Vector size */>
+struct test_func_all : public unary_func<
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    cl_int /* create cl_intN type */
+                                 >
+{
+    typedef typename make_vector_type<cl_int, N>::type input_type;
+    typedef cl_int result_type;
+
+    std::string str()
+    {
+        return "all";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+
+    result_type operator()(const input_type& x)
+    {
+        return perform_all_function(x);
+    }
+
+    bool is_out_bool()
+    {
+        return true;
+    }
+
+    bool is_in1_bool()
+    {
+        return true;
+    }
+
+    std::vector<input_type> in1_special_cases()
+    {
+        return {
+            detail::make_value<input_type>(0),
+            detail::make_value<input_type>(1),
+            detail::make_value<input_type>(12),
+            detail::make_value<input_type>(-12)
+        };
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct test_func_any : public unary_func<
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    cl_int /* create cl_intN type */
+                                 >
+{
+    typedef typename make_vector_type<cl_int, N>::type input_type;
+    typedef cl_int result_type;
+
+    std::string str()
+    {
+        return "any";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+
+    result_type operator()(const input_type& x)
+    {
+        return perform_any_function(x);
+    }
+
+    bool is_out_bool()
+    {
+        return true;
+    }
+
+    bool is_in1_bool()
+    {
+        return true;
+    }
+
+    std::vector<input_type> in1_special_cases()
+    {
+        return {
+            detail::make_value<input_type>(0),
+            detail::make_value<input_type>(1),
+            detail::make_value<input_type>(12),
+            detail::make_value<input_type>(-12)
+        };
+    }
+};
+
+AUTO_TEST_CASE(test_relational_test_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// Helper macro, so we don't have to repreat the same code.
+#define TEST_UNARY_REL_FUNC_MACRO(CLASS_NAME) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<1>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<2>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<4>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<8>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<16>())
+
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isfinite)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isinf)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isnan)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isnormal)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_signbit)
+
+// Tests for all(booln x) and any(booln x) are not run in USE_OPENCLC_KERNELS mode,
+// because those functions in OpenCL C require different reference functions on host
+// compared to their equivalents from OpenCL C++.
+// (In OpenCL C those functions returns true/false based on the most significant bits
+// in any/all component/s of x)
+#ifndef USE_OPENCLC_KERNELS
+    TEST_UNARY_REL_FUNC_MACRO(test_func_all)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_any)
+#else
+    log_info("WARNING:\n\tTests for bool all(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
+    log_info("WARNING:\n\tTests for bool any(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
+#endif
+
+#undef TEST_UNARY_REL_FUNC_MACRO
+
+#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>())
+
+    TEST_BINARY_REL_FUNC_MACRO(test_func_isordered)
+    TEST_BINARY_REL_FUNC_MACRO(test_func_isunordered)
+
+#undef TEST_BINARY_REL_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP

diff --git a/test_conformance/clcpp/spec_constants/CMakeLists.txt b/test_conformance/clcpp/spec_constants/CMakeLists.txt
new file mode 100644
index 0000000..3488a5a
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_SPEC_CONSTANTS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/spec_constants/common.hpp b/test_conformance/clcpp/spec_constants/common.hpp
new file mode 100644
index 0000000..3846fe8
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/common.hpp

@@ -0,0 +1,257 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
+
+#include <algorithm>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#define RUN_SPEC_CONSTANTS_TEST_MACRO(TEST_CLASS) \
+    last_error = run_spec_constants_test(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+// Base class for all tests of cl::spec_contatnt
+template <class T>
+struct spec_constants_test : public detail::base_func_type<T>
+{
+    // Output buffer type
+    typedef T type;
+
+    virtual ~spec_constants_test() {};
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+
+    // Return names of test's kernels, in order.
+    // Typical case: one kernel.
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+
+    // If local size has to be set in clEnqueueNDRangeKernel()
+    // this should return true; otherwise - false;
+    virtual bool set_local_size()
+    {
+        return false;
+    }
+
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                                      cl_device_id device,
+                                      size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t wg_size = work_group_size;
+        for(auto& k : kernels)
+        {
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            wg_size = (std::min)(wg_size, max_wg_size);
+        }
+        return wg_size;
+    }
+
+    // Sets spec constants
+    // Typical case: no spec constants to set
+    virtual cl_int set_spec_constants(const cl_program& program)
+    {
+        return CL_SUCCESS;
+    }
+
+    // This covers typical case:
+    // 1. each kernel is executed once,
+    // 2. the only argument in every kernel is output_buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size,
+                           size_t work_group_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+
+    // This is a function which performs additional queries and checks
+    // if the results are correct. This method is run after checking that
+    // test results (output values) are correct.
+    virtual cl_int check_queries(const std::vector<cl_kernel>& kernels,
+                                 cl_device_id device,
+                                 cl_context context,
+                                 cl_command_queue queue)
+    {
+        (void) kernels;
+        (void) device;
+        (void) context;
+        (void) queue;
+        return CL_SUCCESS;
+    }
+};
+
+template <class spec_constants_test>
+int run_spec_constants_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, spec_constants_test op)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t wg_size;
+    size_t work_size[1];
+    cl_int err;
+
+    typedef typename spec_constants_test::type TYPE;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], options);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    const char * code_c_str = code_str.c_str();
+    err = create_openclcpp_program(context, &program, 1, &(code_c_str), options.c_str());
+    RETURN_ON_ERROR_MSG(err, "Creating OpenCL C++ program failed")
+
+    // Set spec constants
+    err = op.set_spec_constants(program);
+    RETURN_ON_ERROR_MSG(err, "Setting Spec Constants failed")
+
+    // Build program and create 1st kernel
+    err = build_program_create_kernel_helper(
+        context, &program, &(kernels[0]), 1, &(code_c_str), kernel_names[0].c_str()
+    );
+    RETURN_ON_ERROR_MSG(err, "Unable to build program or to create kernel")
+    // Create other kernels
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+
+    // Find the max possible wg size for among all the kernels
+    wg_size = op.get_max_local_size(kernels, device, 1024, err);
+    RETURN_ON_ERROR(err);
+
+    work_size[0] = count;
+    if(op.set_local_size())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size)
+        );
+        work_size[0] = wg_number * wg_size;
+    }
+
+    // host output vector
+    std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
+
+    // device output buffer
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
+    RETURN_ON_ERROR(err)
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        TYPE v = op(i, wg_size);
+        if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+
+    // Check if queries returns correct values
+    err = op.check_queries(kernels, device, context, queue);
+    RETURN_ON_ERROR(err);
+
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    for(auto& k : kernels)
+        clReleaseKernel(k);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP

diff --git a/test_conformance/clcpp/spec_constants/main.cpp b/test_conformance/clcpp/spec_constants/main.cpp
new file mode 100644
index 0000000..0582ed5
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/main.cpp

@@ -0,0 +1,26 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_spec_consts_attributes.hpp"
+#include "test_spec_consts_if.hpp"
+#include "test_spec_consts_init_vars.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp
new file mode 100644
index 0000000..539167f
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp

@@ -0,0 +1,281 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// In this test we check if specialization constant can be successfully used
+// in kernel attribute cl::required_work_group_size(X, Y, Z).
+struct spec_const_required_work_group_size_test : public spec_constants_test<cl_uint>
+{
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_required_work_group_size_test(const bool set_spec_constant,
+                                             const cl_uint work_group_size_0)
+        : m_set_spec_constant(set_spec_constant),
+          m_work_group_size_0(work_group_size_0)
+    {
+
+    }
+
+    std::string str()
+    {
+        if(m_set_spec_constant)
+            return "spec_const_in_required_work_group_size_" + std::to_string(m_work_group_size_0);
+        else
+            return "spec_const_in_required_work_group_size_not_set";
+    }
+
+    bool set_local_size()
+    {
+        return true;
+    }
+
+    size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                              cl_device_id device,
+                              size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        if(m_set_spec_constant)
+        {
+            return m_work_group_size_0;
+        }
+        return size_t(1);
+    }
+
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_work_group_size_0;
+        }
+        return cl_uint(1);
+    }
+
+    // Check if query for CL_KERNEL_COMPILE_WORK_GROUP_SIZE using clGetKernelWorkGroupInfo
+    // return correct values. It should return the work-group size specified by the
+    // cl::required_work_group_size(X, Y, Z) qualifier.
+    cl_int check_queries(const std::vector<cl_kernel>& kernels,
+                         cl_device_id device,
+                         cl_context context,
+                         cl_command_queue queue)
+    {
+        (void) device;
+        (void) context;
+        size_t compile_wg_size[] = { 1, 1, 1 };
+        cl_int error = clGetKernelWorkGroupInfo(
+            kernels[0], device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
+            sizeof(compile_wg_size), compile_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        if(m_set_spec_constant)
+        {
+            if(compile_wg_size[0] != m_work_group_size_0
+               || compile_wg_size[1] != 1
+               || compile_wg_size[2] != 1)
+            {
+                error = -1;
+            }
+        }
+        else
+        {
+            if(compile_wg_size[0] != 1
+               || compile_wg_size[1] != 1
+               || compile_wg_size[2] != 1)
+            {
+                error = -1;
+            }
+        }
+        return error;
+    }
+
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(cl_uint), static_cast<void*>(&m_work_group_size_0)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+
+    // Each work-item writes get_local_size(0) to output[work-item-global-id]
+    std::string generate_program(bool with_attribute)
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            std::string att = " ";
+            if(with_attribute)
+            {
+                std::string work_group_size_0 = "1";
+                if(m_set_spec_constant)
+                {
+                    work_group_size_0 = std::to_string(m_work_group_size_0);
+                }
+                att = "\n__attribute__((reqd_work_group_size(" + work_group_size_0 + ",1,1)))\n";
+            }
+            return
+                "__kernel" + att + "void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = get_local_size(0);\n"
+                "}\n";
+
+        #else
+            std::string att = "";
+            if(with_attribute)
+            {
+                att = "[[cl::required_work_group_size(spec1, 1, 1)]]\n";
+            }
+            return
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "spec_constant<uint, 1> spec1{1};\n"
+                + att +
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = get_local_size(0);\n"
+                "}\n";
+        #endif
+    }
+
+    // Each work-item writes get_local_size(0) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        return generate_program(true);
+    }
+
+private:
+    bool m_set_spec_constant;
+    cl_uint m_work_group_size_0;
+};
+
+// This function return max work-group size that can be used
+// for kernels defined in source
+size_t get_max_wg_size(const std::string& source,
+                       const std::vector<std::string>& kernel_names,
+                       size_t work_group_size, // max wg size we want to have
+                       cl_device_id device,
+                       cl_context context,
+                       cl_command_queue queue,
+                       cl_int& err)
+{
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0]);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+    size_t wg_size = work_group_size;
+    for(auto& k : kernels)
+    {
+        size_t max_wg_size;
+        err = clGetKernelWorkGroupInfo(
+            k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+        wg_size = (std::min)(wg_size, max_wg_size);
+    }
+    return wg_size;
+}
+
+AUTO_TEST_CASE(test_spec_constants_in_kernel_attributes)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// If ONLY_SPIRV_COMPILATION is defined we can't check the max work-group size for the
+// kernel because OpenCL kernel object is never created in that mode.
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    const size_t max_wg_size = 16;
+#else
+    // Get max work-group size that can be used in [[cl::required_work_group_size(X, 1, 1)]]
+    // We do this by building kernel without this attribute and checking what is the max
+    // work-group size we can use with it.
+    auto test = spec_const_required_work_group_size_test(true, 1);
+    const size_t max_wg_size = get_max_wg_size(
+        test.generate_program(false), test.get_kernel_names(),
+        1024, // max wg size we want to test
+        device, context, queue,
+        error
+    );
+    RETURN_ON_ERROR_MSG(error, "Can't get max work-group size");
+#endif
+
+    // Run tests when specialization constant spec1 is set (kernel
+    // attribute is [[cl::required_work_group_size(spec1, 1, 1)]]).
+    for(size_t i = 1; i <= max_wg_size; i *=2)
+    {
+        RUN_SPEC_CONSTANTS_TEST_MACRO(
+            spec_const_required_work_group_size_test(
+                true, i
+            )
+        );
+    }
+    // This test does not set spec constant
+    RUN_SPEC_CONSTANTS_TEST_MACRO(
+        spec_const_required_work_group_size_test(
+            false, 9999999 // This value is incorrect, but won't be set and kernel
+                           // attribute should be [[cl::required_work_group_size(1, 1, 1)]]
+        )
+    );
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP

diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp
new file mode 100644
index 0000000..1c7cec2
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp

@@ -0,0 +1,161 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// This class tests using specialization constant in if statement
+template <class T /* spec constant type*/>
+struct spec_const_in_if_test : public spec_constants_test<cl_uint>
+{
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_in_if_test(const bool set_spec_constant)
+        : m_set_spec_constant(set_spec_constant)
+    {
+        static_assert(
+            is_vector_type<T>::value == false,
+            "Specialization constant can be only scalar int or float type"
+        );
+        switch (sizeof(T))
+        {
+            case 1:
+                m_test_value = T(127);
+                break;
+            case 2:
+                m_test_value = T(0xdeadU);
+                break;
+            // 4 and 8
+            default:
+                m_test_value = T(0xdeaddeadU);
+                break;
+        }
+    }
+
+    std::string str()
+    {
+        return "spec_const_in_if_" + type_name<T>();
+    }
+
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_test_value;
+        }
+        return static_cast<cl_uint>(i);
+    }
+
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            T spec1 = static_cast<T>(m_test_value);
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(T), static_cast<void*>(&spec1)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+
+    // IF set_spec_constant == true:
+    // each work-item writes T(m_test_value) to output[work-item-global-id]
+    // Otherwise:
+    // each work-item writes T(get_global_id(0)) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            std::string result = "gid";
+            if(m_set_spec_constant)
+                result = std::to_string(m_test_value);
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = " + result + ";\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "spec_constant<TYPE,  1> spec1{TYPE(0)};\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    if(get(spec1) == TYPE(" + std::to_string(m_test_value) +"))\n"
+                "    {\n"
+                "        output[gid] = " + std::to_string(m_test_value) +";\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "        output[gid] = gid;\n"
+                "    }\n"
+                "}\n";        
+        #endif
+    }
+
+private:
+    bool m_set_spec_constant;
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_spec_constants_in_if_statement)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    const std::vector<bool> set_spec_const_options { true, false };
+    for(auto option : set_spec_const_options)
+    {        
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_char>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uchar>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_int>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uint>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_long>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_ulong>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_float>(option));
+        if(is_extension_available(device, "cl_khr_fp16"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_half>(option));
+        }
+        if(is_extension_available(device, "cl_khr_fp64"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_double>(option));
+        }
+    }
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP

diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp
new file mode 100644
index 0000000..20bbff0
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp

@@ -0,0 +1,174 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// This class tests initializing variables with a specialization constant value.
+template <class T /* spec constant type*/>
+struct spec_const_init_var : public spec_constants_test<cl_uint>
+{
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_init_var(const bool set_spec_constant)
+        : m_set_spec_constant(set_spec_constant)
+    {
+        static_assert(
+            is_vector_type<T>::value == false,
+            "Specialization constant can be only scalar int or float type"
+        );
+        switch (sizeof(T))
+        {
+            case 1:
+                m_test_value = T(127);
+                break;
+            case 2:
+                m_test_value = T(0xdeadU);
+                break;
+            // 4 and 8
+            default:
+                m_test_value = T(0xdeaddeadU);
+                break;
+        }
+    }
+
+    std::string str()
+    {
+        return "spec_const_init_var_" + type_name<T>();
+    }
+
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_test_value;
+        }
+        return static_cast<cl_uint>(i);
+    }
+
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            T spec = static_cast<T>(m_test_value);
+            // spec1
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(T), static_cast<void*>(&spec)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+
+            // spec2
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(2), sizeof(T), static_cast<void*>(&spec)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+
+    // IF set_spec_constant == true:
+    // each work-item writes T(m_test_value) to output[work-item-global-id]
+    // Otherwise:
+    // each work-item writes T(get_global_id(0)) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            std::string result = "gid";
+            if(m_set_spec_constant)
+                result = std::to_string(m_test_value);
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = " + result + ";\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "spec_constant<TYPE,  1> spec1{TYPE(0)};\n"
+                "spec_constant<TYPE,  2> spec2{TYPE(0)};\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    TYPE var1(spec1.get());\n"
+                "    TYPE var2(spec2);\n"
+                "    TYPE var3; var3 = spec2;\n"
+                "    if((var1 == TYPE(" + std::to_string(m_test_value) +")) "
+                       "&& (var2 == TYPE(" + std::to_string(m_test_value) +"))\n"
+                       "&& (var3 == TYPE(" + std::to_string(m_test_value) +")))\n"
+                "    {\n"
+                "        output[gid] = " + std::to_string(m_test_value) +";\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "        output[gid] = gid;\n"
+                "    }\n"
+                "}\n";        
+        #endif
+    }
+
+private:
+    bool m_set_spec_constant;
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_spec_constants_initializing_variables)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    const std::vector<bool> set_spec_const_options { true, false };
+    for(auto option : set_spec_const_options)
+    {        
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_char>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uchar>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_int>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uint>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_long>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_ulong>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_float>(option));
+        if(is_extension_available(device, "cl_khr_fp16"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_half>(option));
+        }
+        if(is_extension_available(device, "cl_khr_fp64"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_double>(option));
+        }
+    }
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP

diff --git a/test_conformance/clcpp/spirv10_2016.04.27.7z b/test_conformance/clcpp/spirv10_2016.04.27.7z
new file mode 100644
index 0000000..306be24
--- /dev/null
+++ b/test_conformance/clcpp/spirv10_2016.04.27.7z

@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe4f34d616ed7ef70e870c22078f60655f68b0c5191c8d8b9d045dd0e7390bc2
+size 5529152

diff --git a/test_conformance/clcpp/subgroups/CMakeLists.txt b/test_conformance/clcpp/subgroups/CMakeLists.txt
new file mode 100644
index 0000000..c8307d2
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_SUBGROUPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/subgroups/common.hpp b/test_conformance/clcpp/subgroups/common.hpp
new file mode 100644
index 0000000..2b05a3c
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/common.hpp

@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
+
+#include <string>
+#include <vector>
+#include <limits>
+
+enum class work_group_op : int {
+    add, min, max
+};
+
+std::string to_string(work_group_op op)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return "add";
+        case work_group_op::min:
+            return "min";
+        case work_group_op::max:
+            return "max";
+        default:
+            break;
+    }
+    return "";
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
+{
+    std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));
+    switch (op)
+    {
+        case work_group_op::add:
+            return input;
+        case work_group_op::min:
+            {
+                size_t j = wg_size;
+                for(size_t i = 0; i < count; i++)
+                {
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j--;
+                    if(j == 0)
+                    {
+                        j = wg_size;
+                    }
+                }
+            }
+            break;
+        case work_group_op::max:
+            {
+                size_t j = 0;
+                for(size_t i = 0; i < count; i++)
+                {
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j++;
+                    if(j == wg_size)
+                    {
+                        j = 0;
+                    }
+                }
+            }
+    }
+    return input;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+        case work_group_op::min:
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());
+        case work_group_op::max:
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
+    }
+    return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP

diff --git a/test_conformance/clcpp/subgroups/main.cpp b/test_conformance/clcpp/subgroups/main.cpp
new file mode 100644
index 0000000..c026228
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/main.cpp

@@ -0,0 +1,29 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_sg_all.hpp"
+#include "test_sg_any.hpp"
+#include "test_sg_broadcast.hpp"
+#include "test_sg_reduce.hpp"
+#include "test_sg_scan_inclusive.hpp"
+#include "test_sg_scan_exclusive.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/subgroups/test_sg_all.hpp b/test_conformance/clcpp/subgroups/test_sg_all.hpp
new file mode 100644
index 0000000..01d6638
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_all.hpp

@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+std::string generate_sg_all_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = sub_group_all(input[tid] < input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+
+int verify_sg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group all
+            bool all = true;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if(!(in[i+j+k] < in[i+j+k+1]))
+                {
+                    all = false;
+                    break;
+                }
+            }
+
+            // Convert bool to uint
+            cl_uint all_uint = all ? 1 : 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (all_uint != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(all_uint),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_sg_all(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of work-groups (input[tid] < input[tid+1]) will
+        // generate false, it means that for sub_group_all(input[tid] < input[tid+1])
+        // should return false for all sub-groups in that work-groups
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_sg_all(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int sub_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_all_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_sg_all(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_all(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_all(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_all failed");
+    }
+    log_info("sub_group_all passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_all)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+    err = sub_group_all(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP

diff --git a/test_conformance/clcpp/subgroups/test_sg_any.hpp b/test_conformance/clcpp/subgroups/test_sg_any.hpp
new file mode 100644
index 0000000..769bef0
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_any.hpp

@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+std::string generate_sg_any_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = sub_group_any(input[tid] == input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+
+int verify_sg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group any
+            bool any = false;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if(in[i+j+k] == in[i+j+k+1])
+                {
+                    any = true;
+                    break;
+                }
+            }
+
+            // Convert bool to uint
+            cl_uint any_uint = any ? 1 : 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (any_uint != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(any_uint),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_sg_any(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of work-groups (input[tid] == input[tid+1]) will
+        // generate true, it means that for sub_group_all(input[tid] == input[tid+1])
+        // should return false for one sub-group in that work-groups
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_sg_any(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int sub_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_any_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_sg_any(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_any(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_any(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_any failed");
+    }
+    log_info("sub_group_any passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_any)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+    err = sub_group_any(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP

diff --git a/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp b/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp
new file mode 100644
index 0000000..39e420a
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp

@@ -0,0 +1,204 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+std::string generate_sg_broadcast_kernel_code()
+{
+    return
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "#include <opencl_work_group>\n"
+        "using namespace cl;\n"
+        "__kernel void test_sg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "    uint result = sub_group_broadcast(input[tid], 0);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+
+int
+verify_sg_broadcast(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group broadcast
+            cl_uint broadcast_result = in[i+j];
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (broadcast_result != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(broadcast_result),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_sg_broadcast(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(j);
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_sg_broadcast(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int sub_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[] = { 1 };
+    int err;
+
+    // Get kernel source code
+    std::string code_str = generate_sg_broadcast_kernel_code();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
+    RETURN_ON_ERROR(err)
+#endif
+
+    // Get max flat workgroup size
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size = count;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_sg_broadcast(flat_work_size, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_broadcast(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL,&err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    int result = verify_sg_broadcast( input, output, work_size[0], wg_size, sg_max_size);
+    RETURN_ON_ERROR_MSG(result, "sub_group_broadcast failed")
+    log_info("sub_group_broadcast passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_broadcast)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+    err = sub_group_broadcast(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP

diff --git a/test_conformance/clcpp/subgroups/test_sg_reduce.hpp b/test_conformance/clcpp/subgroups/test_sg_reduce.hpp
new file mode 100644
index 0000000..6b20d50
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_reduce.hpp

@@ -0,0 +1,345 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_reduce_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                        "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                sum += in[i + j + k];
+            }
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+            }
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+            }
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_reduce_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_reduce_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_reduce_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_reduce_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_reduce<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_reduce_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_reduce_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_reduce_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP

diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp
new file mode 100644
index 0000000..7bb2b18
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp

@@ -0,0 +1,325 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_scan_exclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                sum += in[i + j + k];
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_scan_exclusive_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_scan_exclusive_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_scan_exclusive_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_exclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_exclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_exclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP

diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp
new file mode 100644
index 0000000..803daa0
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp

@@ -0,0 +1,332 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_scan_inclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                sum += in[i + j + k];
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_scan_inclusive_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_scan_inclusive_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_scan_inclusive_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_inclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_inclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_inclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP

diff --git a/test_conformance/clcpp/synchronization/CMakeLists.txt b/test_conformance/clcpp/synchronization/CMakeLists.txt
new file mode 100644
index 0000000..70d3637
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_SYNCHRONIZATION)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/synchronization/main.cpp b/test_conformance/clcpp/synchronization/main.cpp
new file mode 100644
index 0000000..b337238
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/main.cpp

@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_work_group_barrier.hpp"
+#include "test_sub_group_barrier.hpp"
+#include "named_barrier/test_spec_example.hpp"
+#include "named_barrier/test_named_barrier.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/synchronization/named_barrier/common.hpp b/test_conformance/clcpp/synchronization/named_barrier/common.hpp
new file mode 100644
index 0000000..da34dce
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/common.hpp

@@ -0,0 +1,171 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
+
+#include <vector>
+
+// Common for all OpenCL C++ tests
+#include "../../common.hpp"
+#include "../../funcs_test_utils.hpp"
+
+#define RUN_WG_NAMED_BARRIER_TEST_MACRO(TEST_CLASS) \
+    last_error = run_work_group_named_barrier_barrier_test(  \
+        device, context, queue, num_elements, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+namespace named_barrier {
+
+struct work_group_named_barrier_test_base : public detail::base_func_type<cl_uint>
+{
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    // It's assumed that this program has only one kernel.
+    virtual std::string generate_program() = 0;
+    // Return value that is expected to be in output_buffer[i]
+    virtual cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size) = 0;
+    // Kernel execution
+    // This covers typical case: kernel is executed once, kernel
+    // has only one argument which is output buffer
+    virtual cl_int execute(const cl_kernel kernel,
+                           const cl_mem output_buffer,
+                           const cl_command_queue& queue,
+                           const size_t work_size,
+                           const size_t work_group_size)
+    {
+        cl_int err;
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+        return err;
+    }
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const cl_kernel kernel,
+                                      const cl_device_id device,
+                                      const size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t max_wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_ERROR(error)
+        return (std::min)(work_group_size, max_wg_size);
+    }
+    // if work-groups should be uniform
+    virtual bool enforce_uniform()
+    {
+        return false;
+    }
+};
+
+template <class work_group_named_barrier_test>
+int run_work_group_named_barrier_barrier_test(cl_device_id device, cl_context context, cl_command_queue queue,
+                                              size_t count, work_group_named_barrier_test test)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_group_size;
+    size_t work_size[1];
+    cl_int err;
+
+    std::string code_str = test.generate_program();
+    std::string kernel_name = test.get_kernel_name();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    // Find the max possible wg size for among all the kernels
+    work_group_size = test.get_max_local_size(kernel, device, 256, err);
+    RETURN_ON_ERROR(err);
+    if(work_group_size == 0)
+    {
+        log_info("SKIPPED: Can't produce local size with enough sub-groups. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+
+    work_size[0] = count;
+    // uniform work-group
+    if(test.enforce_uniform())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(work_size[0]) / work_group_size)
+        );
+        work_size[0] = wg_number * work_group_size;
+    }
+
+    // host output vector
+    std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
+
+    // device output buffer
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    // Execute test kernels
+    err = test.execute(kernel, buffers[0], queue, work_size[0], work_group_size);
+    RETURN_ON_ERROR(err)
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
+
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        cl_uint v = test(i, work_group_size, i);
+        if(!(are_equal(v, output[i], ::detail::make_value<cl_uint>(0), test)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", test.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", test.str().c_str(), type_name<cl_uint>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+} // namespace named_barrier
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP

diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp
new file mode 100644
index 0000000..a4f9a04
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp

@@ -0,0 +1,490 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
+
+#include "common.hpp"
+
+namespace named_barrier {
+
+struct local_fence_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "local_fence";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i);
+    }
+
+    // At the end every work-item writes its global id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint *output, "
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  output[gid] = gid;\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    a.wait(mem_fence::local);\n"
+                "    value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    b.wait(mem_fence::local);\n"
+                "    value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      lmem[other_lid] = other_gid;\n"
+                "      b.wait(mem_fence::local);\n" // subgroup 0 and 1 are sync (local)
+                "      value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = gid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 1, wg_size * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            if(max_wg_size >= wg_size) break;
+        }
+        return wg_size;
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        return err;
+    }
+};
+
+struct global_fence_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "global_fence";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i % work_group_size);
+    }
+
+    // At the end every work-item writes its local id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint * output, "
+                                                              "global uint * temp)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "output[gid] = get_local_id(0);\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    temp[other_gid] = other_lid + 1;\n"
+                "    a.wait(mem_fence::global);\n"
+                "    size_t other_lid_same_subgroup = (lid + 2) % get_sub_group_size();\n"
+                "    size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "    temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "    a.wait(mem_fence::global, memory_scope_sub_group);\n"
+                "    value = temp[gid];" // temp[gid] shoule be equal to lid
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    temp[other_gid] = other_lid + 1;\n"
+                "    b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
+                "    size_t other_lid_same_subgroup = "
+                       "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
+                "    size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "    temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "    b.wait(mem_fence::global, memory_scope_sub_group);\n"  // both subgroups wait, sync only within subgroup
+                "    value = temp[gid];" // temp[gid] shoule be equal to lid
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      temp[other_gid] = other_lid + 1;\n"
+                "      b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
+                       // local and global id of some other work-item within work-item subgroup
+                "      size_t other_lid_same_subgroup = "
+                         "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
+                "      size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "      temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "      b.wait(mem_fence::global, memory_scope_sub_group);\n" // both subgroups wait, sync only within subgroup
+                "      value = temp[gid];" // temp[gid] shoule be equal to lid
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = lid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        size_t max_wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        return (std::min)(max_wg_size, work_group_size);
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * work_size, NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+
+        return err;
+    }
+};
+
+struct global_local_fence_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "global_local_fence";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i % work_group_size);
+    }
+
+    // At the end every work-item writes its local id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint * output, "
+                                                              "global uint * temp,"
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "output[gid] = get_local_id(0);\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp,"
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value = 0;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    temp[other_gid] = other_lid;\n"
+                "    a.wait(mem_fence::local | mem_fence::global);\n"
+                "    if(lmem[lid] == gid) value = temp[gid];\n"
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    temp[other_gid] = other_lid;\n"
+                "    b.wait(mem_fence::local | mem_fence::global);\n"
+                "    if(lmem[lid] == gid) value = temp[gid];\n"
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      lmem[other_lid] = other_gid;\n"
+                "      temp[other_gid] = other_lid;\n"
+                "      b.wait(mem_fence::local | mem_fence::global);\n"
+                "      if(lmem[lid] == gid) value = temp[gid];\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = lid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 2, wg_size * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            if(max_wg_size >= wg_size) break;
+        }
+        return wg_size;
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(
+            context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+            sizeof(cl_uint) * work_size, NULL, &err
+        );
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+
+        return err;
+    }
+};
+
+// ------------------------------------------------------------------------------
+// -------------------------- RUN TESTS -----------------------------------------
+// ------------------------------------------------------------------------------
+AUTO_TEST_CASE(test_work_group_named_barrier)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION)))
+    if(!is_extension_available(device, "cl_khr_subgroup_named_barrier"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+
+    // An implementation shall support at least 8 named barriers per work-group. The exact
+    // maximum number can be queried using clGetDeviceInfo with CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR
+    // from the OpenCL 2.2 Extension Specification.
+    cl_uint named_barrier_count;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, sizeof(cl_uint), &named_barrier_count, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    if(named_barrier_count < 8)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Maximum number of named barriers must be at least 8.");
+    }
+#endif
+
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(local_fence_named_barrier_test())
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(global_fence_named_barrier_test())
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(global_local_fence_named_barrier_test())
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP

diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp
new file mode 100644
index 0000000..c80ab71
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp

@@ -0,0 +1,323 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP
+
+#include "common.hpp"
+
+namespace named_barrier {
+
+// ------------------------------------------------------------------------------
+// ----------------------- SPECIFICATION EXAMPLE TEST----------------------------
+// ------------------------------------------------------------------------------
+// This test is based on the example in OpenCL C++ 1.0 specification (OpenCL C++
+// Standard Library > Synchronization Functions > Named barriers > wait).
+struct spec_example_work_group_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "spec_example";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size)
+    {
+        return static_cast<cl_uint>(i);
+    }
+
+    // At the end every work-item writes its global id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                // In OpenCL C kernel we imitate subgroups by partitioning work-group (based on
+                // local ids of work-items), work_group_named_barrier.wait(..) calls are replaced
+                // with work_group_barriers.
+                "__kernel void " + this->get_kernel_name() + "(global uint *output, "
+                                                              "global uint * temp, "
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "size_t lid = get_local_id(0);\n"
+
+                // We divide work-group into ranges:
+                // [0 - e_wg)[ew_g; q_wg)[q_wg; 3 * ew_g)[3 * ew_g; h_wg)[h_wg; get_local_size(0) - 1]
+                // to simulate 8 subgroups
+                "size_t h_wg = get_local_size(0) / 2;\n" // half of work-group
+                "size_t q_wg = get_local_size(0) / 4;\n" // quarter
+                "size_t e_wg = get_local_size(0) / 8;\n" // one-eighth
+
+                "if(lid < h_wg) lmem[lid] = gid;\n" // [0; h_wg)
+                "else           temp[gid] = gid;\n" // [h_wg; get_local_size(0) - 1)
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+                "size_t other_lid = (lid + q_wg) % h_wg;\n"
+                "size_t value = 0;\n"
+                "if(lmem[other_lid] == ((gid - lid) + other_lid)){\n"
+                "     value = gid;\n"
+                "}\n"
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    if(lid < e_wg) lmem[lid + e_wg] = gid;\n" // [0; e_wg)
+                "    else           lmem[lid - e_wg] = gid;\n" // [e_wg; q_wg)
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    if(lid < (3 * e_wg)) lmem[lid + e_wg] = gid;\n" // [q_ww; q_wg + e_wg)
+                "    else                 lmem[lid - e_wg] = gid;\n" // [q_wg + e_wg; h_wg)
+                "}\n"
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    output[gid + q_wg] = lmem[lid];\n"
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    output[gid - q_wg] = lmem[lid];\n"
+                "}\n"
+                "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    if(lid < e_wg) temp[gid] = output[gid + (3 * e_wg)];\n" // [0; e_wg)
+                "    else           temp[gid] = output[gid + e_wg];\n" // [e_wg; q_wg)
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    if(lid < (3 * e_wg)) temp[gid] = output[gid - e_wg];\n"  // [q_ww; q_wg + e_wg)
+                "    else                 temp[gid] = output[gid - (3 * e_wg)];\n"  // [q_wg + e_wg; h_wg)
+                "}\n"
+                "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+
+                "output[gid] = temp[gid];\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+
+                "void b_function(work_group_named_barrier &b, size_t value, local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "size_t lid = get_local_id(0);\n"
+                // Work-items from the 1st subgroup writes to local memory that will be
+                // later read byt the 0th subgroup, and the other way around - 0th subgroup
+                // writes what 1st subgroup will later read.
+                // b.wait(mem_fence::local) should provide sync between those two subgroups.
+                "if(get_sub_group_id() < 1) lmem[lid + get_max_sub_group_size()] = value;\n"
+                "else                       lmem[lid - get_max_sub_group_size()] = value;\n"
+                "b.wait(mem_fence::local);\n\n" // sync writes to lmem for 2 subgroups (ids: 0, 1)
+                "}\n"
+
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp, "
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "local<work_group_named_barrier> a(4);\n"
+                "local<work_group_named_barrier> b(2);\n"
+                "local<work_group_named_barrier> c(2);\n"
+
+                "size_t gid = get_global_id(0);\n"
+                "size_t lid = get_local_id(0);\n"
+                "if(get_sub_group_id() < 4)"
+                "{\n"
+                "    lmem[lid] = gid;\n"
+                "    a.wait(mem_fence::local);\n" // sync writes to lmem for 4 subgroups (ids: 0, 1, 2, 3)
+                     // Now all four subgroups should see changes in lmem.
+                "    size_t other_lid = (lid + (2 * get_max_sub_group_size())) % (4 * get_max_sub_group_size());\n"
+                "    size_t value = 0;\n"
+                "    if(lmem[other_lid] == ((gid - lid) + other_lid)){\n"
+                "        value = gid;\n"
+                "    }\n"
+                "    a.wait(mem_fence::local);\n" // sync reads from lmem for 4 subgroups (ids: 0, 1, 2, 3)
+
+                "    if(get_sub_group_id() < 2)" // ids: 0, 1
+                "    {\n"
+                "        b_function(b, value, lmem);\n"
+                "    }\n"
+                "    else" // ids: 2, 3
+                "    {\n"
+                         // Work-items from the 2nd subgroup writes to local memory that will be
+                         // later read byt the 3rd subgroup, and the other way around - 3rd subgroup
+                         // writes what 2nd subgroup will later read.
+                         // c.wait(mem_fence::local) should provide sync between those two subgroups.
+                "        if(get_sub_group_id() < 3) lmem[lid + get_max_sub_group_size()] = value ;\n"
+                "        else                       lmem[lid - get_max_sub_group_size()] = value;\n"
+                "        c.wait(mem_fence::local);\n" // sync writes to lmem for 2 subgroups (3, 4)
+                "    }\n"
+
+                     // Now (0, 1) are in sync (local mem), and (3, 4) are in sync (local mem).
+                     // However, subgroups (0, 1) are not in sync with (3, 4).
+                "    if(get_sub_group_id() < 4) {\n" // ids: 0, 1, 2, 3
+                "        if(get_sub_group_id() < 2) output[gid + (2 * get_max_sub_group_size())] = lmem[lid];\n"
+                "        else                       output[gid - (2 * get_max_sub_group_size())] = lmem[lid];\n"
+                "        a.wait(mem_fence::global);\n" // sync writes to global memory (output)
+                                                       // for 4 subgroups (0, 1, 2, 3)
+                "    }\n"
+                "}\n"
+                "else {\n" // subgroups with id > 4
+                "    temp[gid] = gid;\n"
+                "}\n"
+
+                // Now (0, 1, 2, 3) are in sync (global mem)
+                "if(get_sub_group_id() < 2) {\n"
+                "    if(get_sub_group_id() < 1) temp[gid] = output[gid + (3 * get_max_sub_group_size())];\n"
+                "    else                       temp[gid] = output[gid + (get_max_sub_group_size())];\n"
+                "}\n"
+                "else if(get_sub_group_id() < 4) {\n"
+                "    if(get_sub_group_id() < 3) temp[gid] = output[gid - (get_max_sub_group_size())];\n"
+                "    else                       temp[gid] = output[gid - (3 * get_max_sub_group_size())];\n"
+                "}\n"
+
+                // Synchronize the entire work-group (in terms of accesses to global memory)
+                "work_group_barrier(mem_fence::global);\n"
+                "output[gid] = temp[gid];\n\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 2, ((wg_size / 2) + 1) * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_ERROR(error)
+            if(max_wg_size >= wg_size) break;
+        }
+
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            // make sure wg_size is a multiple of 8
+            if(wg_size % 8 > 0) wg_size -= (wg_size % 8);
+            return wg_size;
+        #else
+            // make sure that wg_size will produce at least min_num_sub_groups
+            // subgroups in each work-group
+            size_t local_size[3] = { 1, 1, 1 };
+            size_t min_num_sub_groups = 8;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
+                                            sizeof(size_t), &min_num_sub_groups,
+                                            sizeof(size_t) * 3, &local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
+            {
+                if(min_num_sub_groups == 1)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "Can't produce local size with one subgroup")
+                }
+                return 0;
+            }
+            local_size[0] = (std::min)(wg_size, local_size[0]);
+
+            // double-check
+            size_t sub_group_count_for_ndrange;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                                            sizeof(size_t) * 3, local_size,
+                                            sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (sub_group_count_for_ndrange < min_num_sub_groups)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected >=%lu, got %lu)",
+                    min_num_sub_groups, sub_group_count_for_ndrange
+                )
+            }
+
+            return local_size[0];
+        #endif
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * work_size, NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+
+        return err;
+    }
+};
+
+// ------------------------------------------------------------------------------
+// -------------------------- RUN TESTS -----------------------------------------
+// ------------------------------------------------------------------------------
+AUTO_TEST_CASE(test_work_group_named_barrier_spec_example)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION)))
+    if(!is_extension_available(device, "cl_khr_subgroup_named_barrier"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(spec_example_work_group_named_barrier_test())
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP

diff --git a/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp
new file mode 100644
index 0000000..c7074ed
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp

@@ -0,0 +1,342 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_sub_group_barrier {
+
+enum class barrier_type
+{
+    local,
+    global
+};
+
+struct test_options
+{
+    barrier_type barrier;
+    size_t max_count;
+    size_t num_tests;
+};
+
+const std::string source_common = R"(
+    // Circular shift of sub-group local ids
+    size_t get_shifted_local_id(int sub_group_local_id_delta)
+    {
+        const int sub_group_size = (int)get_sub_group_size();
+        return (get_local_id(0) - get_sub_group_local_id()) +
+            (((int)get_sub_group_local_id() + sub_group_local_id_delta) % sub_group_size + sub_group_size) % sub_group_size;
+    }
+
+    // Get global ids from shifted local ids
+    size_t get_shifted_global_id(int sub_group_local_id_delta)
+    {
+        return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(sub_group_local_id_delta);
+    }
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+    )";
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+            output[gid] += gid;
+            sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+            values[lid] += gid;
+            sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+
+    using namespace cl;
+
+    )";
+    s << source_common;
+
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        sub_group_barrier(mem_fence::global);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            sub_group_barrier(mem_fence::global);
+
+            output[gid] += gid;
+            sub_group_barrier(mem_fence::global);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output, local_ptr<long[]> values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        sub_group_barrier(mem_fence::local);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            sub_group_barrier(mem_fence::local);
+
+            values[lid] += gid;
+            sub_group_barrier(mem_fence::local);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (!is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t max_work_group_size;
+    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+    if (options.barrier == barrier_type::local)
+    {
+        cl_ulong kernel_local_mem_size;
+        error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+        cl_ulong device_local_mem_size;
+        error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+        max_work_group_size = (std::min<cl_ulong>)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long));
+    }
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> global_size_dis(1, options.max_count);
+    std::uniform_int_distribution<size_t> local_size_dis(1, max_work_group_size);
+    std::uniform_int_distribution<int> iter_dis(0, 20);
+
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        const size_t global_size = global_size_dis(gen);
+        const size_t local_size = local_size_dis(gen);
+        const size_t count = global_size;
+
+        const int iter_lo = -iter_dis(gen);
+        const int iter_hi = +iter_dis(gen);
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        if (options.barrier == barrier_type::local)
+        {
+            error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        }
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<cl_long> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_long) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        error = clReleaseMemObject(output_buffer);
+        RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+
+        for (size_t gid = 0; gid < count; gid++)
+        {
+            const long value = output[gid];
+            const long expected = gid + 2 * gid * (iter_hi - iter_lo);
+
+            if (value != expected)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Element %lu has incorrect value. Expected: %ld, got: %ld",
+                    gid, expected, value
+                );
+            }
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_sub_group_barrier_global)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::global;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_sub_group_barrier_local)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::local;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP

diff --git a/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp
new file mode 100644
index 0000000..aa7fbd2
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp

@@ -0,0 +1,330 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_work_group_barrier {
+
+enum class barrier_type
+{
+    local,
+    global
+};
+
+struct test_options
+{
+    barrier_type barrier;
+    size_t max_count;
+    size_t num_tests;
+};
+
+const std::string source_common = R"(
+    // Circular shift of local ids
+    size_t get_shifted_local_id(int local_id_delta)
+    {
+        const int local_size = (int)get_local_size(0);
+        return (((int)get_local_id(0) + local_id_delta) % local_size + local_size) % local_size;
+    }
+
+    // Get global ids from shifted local ids
+    size_t get_shifted_global_id(int local_id_delta)
+    {
+        return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(local_id_delta);
+    }
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+            output[gid] += gid;
+            work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        work_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+            values[lid] += gid;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+
+    using namespace cl;
+
+    )";
+    s << source_common;
+
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        work_group_barrier(mem_fence::global);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            work_group_barrier(mem_fence::global);
+
+            output[gid] += gid;
+            work_group_barrier(mem_fence::global);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output, local_ptr<long[]> values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        work_group_barrier(mem_fence::local);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            work_group_barrier(mem_fence::local);
+
+            values[lid] += gid;
+            work_group_barrier(mem_fence::local);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t max_work_group_size;
+    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+    if (options.barrier == barrier_type::local)
+    {
+        cl_ulong kernel_local_mem_size;
+        error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+        cl_ulong device_local_mem_size;
+        error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+        max_work_group_size = (std::min<cl_ulong>)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long));
+    }
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> global_size_dis(1, options.max_count);
+    std::uniform_int_distribution<size_t> local_size_dis(1, max_work_group_size);
+    std::uniform_int_distribution<int> iter_dis(0, 20);
+
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        const size_t global_size = global_size_dis(gen);
+        const size_t local_size = local_size_dis(gen);
+        const size_t count = global_size;
+
+        const int iter_lo = -iter_dis(gen);
+        const int iter_hi = +iter_dis(gen);
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        if (options.barrier == barrier_type::local)
+        {
+            error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        }
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<cl_long> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_long) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        error = clReleaseMemObject(output_buffer);
+        RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+
+        for (size_t gid = 0; gid < count; gid++)
+        {
+            const long value = output[gid];
+            const long expected = gid + 2 * gid * (iter_hi - iter_lo);
+
+            if (value != expected)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Element %lu has incorrect value. Expected: %ld, got: %ld",
+                    gid, expected, value
+                );
+            }
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_work_group_barrier_global)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::global;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_work_group_barrier_local)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::local;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP

diff --git a/test_conformance/clcpp/utils_common/errors.hpp b/test_conformance/clcpp/utils_common/errors.hpp
new file mode 100644
index 0000000..c169462
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/errors.hpp

@@ -0,0 +1,134 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_ERRORS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_ERRORS_HPP
+
+#include <string>
+
+#include "../harness/errorHelpers.h"
+
+// ------------- Check OpenCL error helpers (marcos) -----------------
+
+std::string get_cl_error_string(cl_int error)
+{
+#define CASE_CL_ERROR(x) case x: return #x;
+    switch (error)
+    {
+        CASE_CL_ERROR(CL_SUCCESS)
+        CASE_CL_ERROR(CL_DEVICE_NOT_FOUND)
+        CASE_CL_ERROR(CL_DEVICE_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_COMPILER_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_MEM_OBJECT_ALLOCATION_FAILURE)
+        CASE_CL_ERROR(CL_OUT_OF_RESOURCES)
+        CASE_CL_ERROR(CL_OUT_OF_HOST_MEMORY)
+        CASE_CL_ERROR(CL_PROFILING_INFO_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_MEM_COPY_OVERLAP)
+        CASE_CL_ERROR(CL_IMAGE_FORMAT_MISMATCH)
+        CASE_CL_ERROR(CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        CASE_CL_ERROR(CL_BUILD_PROGRAM_FAILURE)
+        CASE_CL_ERROR(CL_MAP_FAILURE)
+        CASE_CL_ERROR(CL_MISALIGNED_SUB_BUFFER_OFFSET)
+        CASE_CL_ERROR(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
+        CASE_CL_ERROR(CL_COMPILE_PROGRAM_FAILURE)
+        CASE_CL_ERROR(CL_LINKER_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_LINK_PROGRAM_FAILURE)
+        CASE_CL_ERROR(CL_DEVICE_PARTITION_FAILED)
+        CASE_CL_ERROR(CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
+
+        CASE_CL_ERROR(CL_INVALID_VALUE)
+        CASE_CL_ERROR(CL_INVALID_DEVICE_TYPE)
+        CASE_CL_ERROR(CL_INVALID_PLATFORM)
+        CASE_CL_ERROR(CL_INVALID_DEVICE)
+        CASE_CL_ERROR(CL_INVALID_CONTEXT)
+        CASE_CL_ERROR(CL_INVALID_QUEUE_PROPERTIES)
+        CASE_CL_ERROR(CL_INVALID_COMMAND_QUEUE)
+        CASE_CL_ERROR(CL_INVALID_HOST_PTR)
+        CASE_CL_ERROR(CL_INVALID_MEM_OBJECT)
+        CASE_CL_ERROR(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
+        CASE_CL_ERROR(CL_INVALID_IMAGE_SIZE)
+        CASE_CL_ERROR(CL_INVALID_SAMPLER)
+        CASE_CL_ERROR(CL_INVALID_BINARY)
+        CASE_CL_ERROR(CL_INVALID_BUILD_OPTIONS)
+        CASE_CL_ERROR(CL_INVALID_PROGRAM)
+        CASE_CL_ERROR(CL_INVALID_PROGRAM_EXECUTABLE)
+        CASE_CL_ERROR(CL_INVALID_KERNEL_NAME)
+        CASE_CL_ERROR(CL_INVALID_KERNEL_DEFINITION)
+        CASE_CL_ERROR(CL_INVALID_KERNEL)
+        CASE_CL_ERROR(CL_INVALID_ARG_INDEX)
+        CASE_CL_ERROR(CL_INVALID_ARG_VALUE)
+        CASE_CL_ERROR(CL_INVALID_ARG_SIZE)
+        CASE_CL_ERROR(CL_INVALID_KERNEL_ARGS)
+        CASE_CL_ERROR(CL_INVALID_WORK_DIMENSION)
+        CASE_CL_ERROR(CL_INVALID_WORK_GROUP_SIZE)
+        CASE_CL_ERROR(CL_INVALID_WORK_ITEM_SIZE)
+        CASE_CL_ERROR(CL_INVALID_GLOBAL_OFFSET)
+        CASE_CL_ERROR(CL_INVALID_EVENT_WAIT_LIST)
+        CASE_CL_ERROR(CL_INVALID_EVENT)
+        CASE_CL_ERROR(CL_INVALID_OPERATION)
+        CASE_CL_ERROR(CL_INVALID_GL_OBJECT)
+        CASE_CL_ERROR(CL_INVALID_BUFFER_SIZE)
+        CASE_CL_ERROR(CL_INVALID_MIP_LEVEL)
+        CASE_CL_ERROR(CL_INVALID_GLOBAL_WORK_SIZE)
+        CASE_CL_ERROR(CL_INVALID_PROPERTY)
+        CASE_CL_ERROR(CL_INVALID_IMAGE_DESCRIPTOR)
+        CASE_CL_ERROR(CL_INVALID_COMPILER_OPTIONS)
+        CASE_CL_ERROR(CL_INVALID_LINKER_OPTIONS)
+        CASE_CL_ERROR(CL_INVALID_DEVICE_PARTITION_COUNT)
+        CASE_CL_ERROR(CL_INVALID_PIPE_SIZE)
+        CASE_CL_ERROR(CL_INVALID_DEVICE_QUEUE)
+        CASE_CL_ERROR(CL_INVALID_SPEC_ID)
+        CASE_CL_ERROR(CL_MAX_SIZE_RESTRICTION_EXCEEDED)
+        default: return "(unknown error code)";
+    }
+#undef CASE_CL_ERROR
+}
+
+#define CHECK_ERROR(x) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+    }
+#define CHECK_ERROR_MSG(x, ...) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: " __VA_ARGS__);\
+        log_error("\n");\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+    }
+#define RETURN_ON_ERROR(x) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+#define RETURN_ON_ERROR_MSG(x, ...) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: " __VA_ARGS__);\
+        log_error("\n");\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+
+#define RETURN_ON_CL_ERROR(x, cl_func_name) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %s failed: %s (%d)\n", cl_func_name, get_cl_error_string(x).c_str(), x);\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_ERRORS_HPP

diff --git a/test_conformance/clcpp/utils_common/is_vector_type.hpp b/test_conformance/clcpp/utils_common/is_vector_type.hpp
new file mode 100644
index 0000000..0232e51
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/is_vector_type.hpp

@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP
+
+#include "../common.hpp"
+
+// is_vector_type<Type>::value is true if Type is an OpenCL
+// vector type; otherwise - false.
+//
+// Examples: 
+// * is_vector_type<cl_float>::value == false
+// * is_vector_type<cl_float4>::value == true
+template<class Type>
+struct is_vector_type
+{
+    const static bool value = false;
+};
+
+#define ADD_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct is_vector_type<Type ## n> \
+    { \
+        const static bool value = true; \
+    };
+
+#define ADD_VECTOR_TYPES(Type) \
+    ADD_VECTOR_TYPE(Type, 2) \
+    ADD_VECTOR_TYPE(Type, 4) \
+    ADD_VECTOR_TYPE(Type, 8) \
+    ADD_VECTOR_TYPE(Type, 16)
+
+ADD_VECTOR_TYPES(cl_char)
+ADD_VECTOR_TYPES(cl_uchar)
+ADD_VECTOR_TYPES(cl_short)
+ADD_VECTOR_TYPES(cl_ushort)
+ADD_VECTOR_TYPES(cl_int)
+ADD_VECTOR_TYPES(cl_uint)
+ADD_VECTOR_TYPES(cl_long)
+ADD_VECTOR_TYPES(cl_ulong)
+ADD_VECTOR_TYPES(cl_float)
+ADD_VECTOR_TYPES(cl_double)
+
+#undef ADD_VECTOR_TYPES
+#undef ADD_VECTOR_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP

diff --git a/test_conformance/clcpp/utils_common/kernel_helpers.hpp b/test_conformance/clcpp/utils_common/kernel_helpers.hpp
new file mode 100644
index 0000000..189b823
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/kernel_helpers.hpp

@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP
+
+#include "../common.hpp"
+
+// Creates a OpenCL C++/C program out_program and kernel out_kernel.
+int create_opencl_kernel(cl_context context,
+                         cl_program *out_program,
+                         cl_kernel *out_kernel,
+                         const char *source,
+                         const std::string& kernel_name,
+                         const std::string& build_options = "",
+                         const bool openclCXX = true)
+{
+    return create_single_kernel_helper(
+        context, out_program, out_kernel, 1, &source,
+        kernel_name.c_str(), build_options.c_str(), openclCXX
+    );
+}
+
+int create_opencl_kernel(cl_context context,
+                         cl_program *out_program,
+                         cl_kernel *out_kernel,
+                         const std::string& source,
+                         const std::string& kernel_name,
+                         const std::string& build_options = "",
+                         const bool openclCXX = true)
+{
+    return create_opencl_kernel(
+        context, out_program, out_kernel,
+        source.c_str(), kernel_name, build_options, openclCXX
+    );
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP

diff --git a/test_conformance/clcpp/utils_common/make_vector_type.hpp b/test_conformance/clcpp/utils_common/make_vector_type.hpp
new file mode 100644
index 0000000..11b1185
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/make_vector_type.hpp

@@ -0,0 +1,65 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP
+
+#include "../common.hpp"
+
+// Using scalar_type and i creates a type scalar_typei.
+// 
+// Example:
+// * make_vector_type<cl_uint, 8>::type is cl_uint8
+// * make_vector_type<cl_uint, 1>::type is cl_uint
+template<class scalar_type, size_t i>
+struct make_vector_type
+{
+    typedef void type;
+};
+
+#define ADD_MAKE_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct make_vector_type<Type, n> \
+    { \
+        typedef Type ## n type; \
+    };
+
+#define ADD_MAKE_VECTOR_TYPES(Type) \
+    template<> \
+    struct make_vector_type<Type, 1> \
+    { \
+        typedef Type type; \
+    }; \
+    ADD_MAKE_VECTOR_TYPE(Type, 2) \
+    ADD_MAKE_VECTOR_TYPE(Type, 3) \
+    ADD_MAKE_VECTOR_TYPE(Type, 4) \
+    ADD_MAKE_VECTOR_TYPE(Type, 8) \
+    ADD_MAKE_VECTOR_TYPE(Type, 16)
+
+ADD_MAKE_VECTOR_TYPES(cl_char)
+ADD_MAKE_VECTOR_TYPES(cl_uchar)
+ADD_MAKE_VECTOR_TYPES(cl_short)
+ADD_MAKE_VECTOR_TYPES(cl_ushort)
+ADD_MAKE_VECTOR_TYPES(cl_int)
+ADD_MAKE_VECTOR_TYPES(cl_uint)
+ADD_MAKE_VECTOR_TYPES(cl_long)
+ADD_MAKE_VECTOR_TYPES(cl_ulong)
+ADD_MAKE_VECTOR_TYPES(cl_float)
+ADD_MAKE_VECTOR_TYPES(cl_double)
+
+#undef ADD_MAKE_VECTOR_TYPES
+#undef ADD_MAKE_VECTOR_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP

diff --git a/test_conformance/clcpp/utils_common/scalar_type.hpp b/test_conformance/clcpp/utils_common/scalar_type.hpp
new file mode 100644
index 0000000..4c939bb
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/scalar_type.hpp

@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP
+
+#include "../common.hpp"
+
+// scalar_type<Type>::type returns scalar type of Type.
+//
+// Examples:
+// * scalar_type<cl_float>::type is cl_float
+// * scalar_type<cl_float4>::types is cl_float
+template<class Type>
+struct scalar_type
+{
+    typedef void type;
+};
+
+#define ADD_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct scalar_type<Type ## n> \
+    { \
+        typedef Type type; \
+    };
+
+#define ADD_VECTOR_TYPES(Type) \
+    template<> \
+    struct scalar_type<Type> \
+    { \
+        typedef Type type; \
+    }; \
+    ADD_VECTOR_TYPE(Type, 2) \
+    ADD_VECTOR_TYPE(Type, 4) \
+    ADD_VECTOR_TYPE(Type, 8) \
+    ADD_VECTOR_TYPE(Type, 16)
+
+ADD_VECTOR_TYPES(cl_char)
+ADD_VECTOR_TYPES(cl_uchar)
+ADD_VECTOR_TYPES(cl_short)
+ADD_VECTOR_TYPES(cl_ushort)
+ADD_VECTOR_TYPES(cl_int)
+ADD_VECTOR_TYPES(cl_uint)
+ADD_VECTOR_TYPES(cl_long)
+ADD_VECTOR_TYPES(cl_ulong)
+ADD_VECTOR_TYPES(cl_float)
+ADD_VECTOR_TYPES(cl_double)
+
+#undef ADD_VECTOR_TYPES
+#undef ADD_VECTOR_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP

diff --git a/test_conformance/clcpp/utils_common/string.hpp b/test_conformance/clcpp/utils_common/string.hpp
new file mode 100644
index 0000000..ad5ac9f
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/string.hpp

@@ -0,0 +1,70 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP
+
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <type_traits>
+
+#include "is_vector_type.hpp"
+#include "scalar_type.hpp"
+#include "type_name.hpp"
+
+#include "../common.hpp"
+
+
+template<class type>
+std::string format_value(const type& value,
+                         typename std::enable_if<is_vector_type<type>::value>::type* = 0)
+{
+    std::stringstream s;
+    s << type_name<type>() << "{ ";
+    s << std::scientific << std::setprecision(6);
+    for (size_t j = 0; j < vector_size<type>::value; j++)
+    {
+        if (j > 0)
+            s << ", ";
+        s << value.s[j];
+    }
+    s << " }";
+    return s.str();
+}
+
+template<class type>
+std::string format_value(const type& value,
+                         typename std::enable_if<!is_vector_type<type>::value>::type* = 0)
+{
+    std::stringstream s;
+    s << type_name<type>() << "{ ";
+    s << std::scientific << std::setprecision(6);
+    s << value;
+    s << " }";
+    return s.str();
+}
+
+void replace_all(std::string& str, const std::string& from, const std::string& to)
+{
+    size_t start_pos = 0;
+    while((start_pos = str.find(from, start_pos)) != std::string::npos) {
+        str.replace(start_pos, from.length(), to);
+        start_pos += to.length();
+    }
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP

diff --git a/test_conformance/clcpp/utils_common/type_name.hpp b/test_conformance/clcpp/utils_common/type_name.hpp
new file mode 100644
index 0000000..c66f6e4
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/type_name.hpp

@@ -0,0 +1,65 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP
+
+#include "../common.hpp"
+
+// Returns type name (in OpenCL device). 
+// cl_uint - "uint", cl_float2 -> "float2"
+template<class Type>
+std::string type_name()
+{
+    return "unknown";
+}
+
+#define ADD_TYPE_NAME(Type, str) \
+    template<> \
+    std::string type_name<Type>() \
+    { \
+        return #str; \
+    }
+
+#define ADD_TYPE_NAME2(Type) \
+    ADD_TYPE_NAME(cl_ ## Type, Type)
+
+#define ADD_TYPE_NAME3(Type, x) \
+    ADD_TYPE_NAME2(Type ## x)
+
+#define ADD_TYPE_NAMES(Type) \
+    ADD_TYPE_NAME2(Type) \
+    ADD_TYPE_NAME3(Type, 2) \
+    ADD_TYPE_NAME3(Type, 4) \
+    ADD_TYPE_NAME3(Type, 8) \
+    ADD_TYPE_NAME3(Type, 16)
+
+ADD_TYPE_NAMES(char)
+ADD_TYPE_NAMES(uchar)
+ADD_TYPE_NAMES(short)
+ADD_TYPE_NAMES(ushort)
+ADD_TYPE_NAMES(int)
+ADD_TYPE_NAMES(uint)
+ADD_TYPE_NAMES(long)
+ADD_TYPE_NAMES(ulong)
+ADD_TYPE_NAMES(float)
+ADD_TYPE_NAMES(double)
+
+#undef ADD_TYPE_NAMES
+#undef ADD_TYPE_NAME3
+#undef ADD_TYPE_NAME2
+#undef ADD_TYPE_NAME
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP

diff --git a/test_conformance/clcpp/utils_common/type_supported.hpp b/test_conformance/clcpp/utils_common/type_supported.hpp
new file mode 100644
index 0000000..8d4f721
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/type_supported.hpp

@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP
+
+#include "../common.hpp"
+
+// Returns true if type is supported by device; otherwise - false;
+template<class Type>
+bool type_supported(cl_device_id device)
+{
+    (void) device;
+    return false;
+}
+
+#define ADD_SUPPORTED_TYPE(Type) \
+    template<> \
+    bool type_supported<Type>(cl_device_id device) \
+    { \
+        (void) device; \
+        return true; \
+    }
+
+ADD_SUPPORTED_TYPE(cl_char)
+ADD_SUPPORTED_TYPE(cl_uchar)
+ADD_SUPPORTED_TYPE(cl_short)
+ADD_SUPPORTED_TYPE(cl_ushort)
+ADD_SUPPORTED_TYPE(cl_int)
+ADD_SUPPORTED_TYPE(cl_uint)
+
+// ulong
+template<>
+bool type_supported<cl_ulong>(cl_device_id device)
+{
+    // long types do not have to be supported in EMBEDDED_PROFILE.
+    char profile[128];
+    int error;
+
+    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error("ERROR: clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
+        return false;
+    }
+
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        return is_extension_available(device, "cles_khr_int64");
+
+    return true;
+}
+// long
+template<>
+bool type_supported<cl_long>(cl_device_id device)
+{
+    return type_supported<cl_ulong>(device);
+}
+ADD_SUPPORTED_TYPE(cl_float)
+// double
+template<>
+bool type_supported<cl_double>(cl_device_id device)
+{
+    return is_extension_available(device, "cl_khr_fp64");
+}
+
+#define ADD_SUPPORTED_VEC_TYPE1(Type, n) \
+    template<> \
+    bool type_supported<Type ## n>(cl_device_id device) \
+    { \
+        return type_supported<Type>(device); \
+    }
+
+#define ADD_SUPPORTED_VEC_TYPE2(Type) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 2) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 4) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 8) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 16)
+
+ADD_SUPPORTED_VEC_TYPE2(cl_char)
+ADD_SUPPORTED_VEC_TYPE2(cl_uchar)
+ADD_SUPPORTED_VEC_TYPE2(cl_short)
+ADD_SUPPORTED_VEC_TYPE2(cl_ushort)
+ADD_SUPPORTED_VEC_TYPE2(cl_int)
+ADD_SUPPORTED_VEC_TYPE2(cl_uint)
+ADD_SUPPORTED_VEC_TYPE2(cl_long)
+ADD_SUPPORTED_VEC_TYPE2(cl_ulong)
+ADD_SUPPORTED_VEC_TYPE2(cl_float)
+// ADD_SUPPORTED_VEC_TYPE2(cl_double)
+
+#undef ADD_SUPPORTED_VEC_TYPE2
+#undef ADD_SUPPORTED_VEC_TYPE1
+#undef ADD_SUPPORTED_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP

diff --git a/test_conformance/clcpp/utils_common/vector_size.hpp b/test_conformance/clcpp/utils_common/vector_size.hpp
new file mode 100644
index 0000000..4817506
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/vector_size.hpp

@@ -0,0 +1,61 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP
+
+#include "../common.hpp"
+
+// Returns 1 if Type is a scalar type; otherwise if it's a vector type, 
+// it returns number of components in that Type. 
+template<class Type>
+struct vector_size
+{
+    const static size_t value = 1;
+};
+
+#define ADD_VECTOR_SIZE_TYPE(Type, n) \
+    template<> \
+    struct vector_size<Type ## n> \
+    { \
+        const static size_t value = n; \
+    };
+
+#define ADD_VECTOR_SIZE_TYPES(Type) \
+    template<> \
+    struct vector_size<Type> \
+    { \
+        const static size_t value = 1; \
+    }; \
+    ADD_VECTOR_SIZE_TYPE(Type, 2) \
+    ADD_VECTOR_SIZE_TYPE(Type, 4) \
+    ADD_VECTOR_SIZE_TYPE(Type, 8) \
+    ADD_VECTOR_SIZE_TYPE(Type, 16)
+
+ADD_VECTOR_SIZE_TYPES(cl_char)
+ADD_VECTOR_SIZE_TYPES(cl_uchar)
+ADD_VECTOR_SIZE_TYPES(cl_short)
+ADD_VECTOR_SIZE_TYPES(cl_ushort)
+ADD_VECTOR_SIZE_TYPES(cl_int)
+ADD_VECTOR_SIZE_TYPES(cl_uint)
+ADD_VECTOR_SIZE_TYPES(cl_long)
+ADD_VECTOR_SIZE_TYPES(cl_ulong)
+ADD_VECTOR_SIZE_TYPES(cl_float)
+ADD_VECTOR_SIZE_TYPES(cl_double)
+
+#undef ADD_VECTOR_SIZE_TYPES
+#undef ADD_VECTOR_SIZE_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP

diff --git a/test_conformance/clcpp/utils_test/binary.hpp b/test_conformance/clcpp/utils_test/binary.hpp
new file mode 100644
index 0000000..5ff35c9
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/binary.hpp

@@ -0,0 +1,308 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP
+
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+
+#include "../common.hpp"
+
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+
+template<class IN1, class IN2, class OUT1>
+struct binary_func : public detail::base_func_type<OUT1>
+{
+    typedef IN1 in1_type;
+    typedef IN2 in2_type;
+    typedef OUT1 out_type;
+
+    virtual ~binary_func() {};
+    virtual std::string str() = 0;
+
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ", " + type_name<IN2>() + ")";
+    }
+
+    bool is_in1_bool()
+    {
+        return false;
+    }
+
+    bool is_in2_bool()
+    {
+        return false;
+    }
+
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+
+    IN2 min2()
+    {
+        return detail::get_min<IN2>();
+    }
+
+    IN2 max2()
+    {
+        return detail::get_max<IN2>();
+    }
+
+    std::vector<IN1> in1_special_cases()
+    {
+        return { };
+    }
+
+    std::vector<IN2> in2_special_cases()
+    {
+        return { };
+    }
+
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const IN2& in2, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        (void) in2;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in1_type, class in2_type, class out_type>
+std::string generate_kernel_binary(func_type func)
+{
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in1_type>() + " *input1,\n"
+        "                                      global " + type_name<in2_type>() + " *input2,\n"
+        "                                      global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in1_type, class in2_type, class out_type>
+std::string generate_kernel_binary(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + ", " + in2_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in1_type>() + "[]> input1,\n"
+        "                                      global_ptr<" + type_name<in2_type>() + "[]> input2,\n"
+        "                                      global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT1, class INPUT2, class OUTPUT, class binary_op>
+bool verify_binary(const std::vector<INPUT1> &in1,
+                   const std::vector<INPUT2> &in2,
+                   const std::vector<OUTPUT> &out,
+                   binary_op op)
+{
+    for(size_t i = 0; i < in1.size(); i++)
+    {
+        auto expected = op(in1[i], in2[i]);
+        if(!are_equal(expected, out[i], op.delta(in1[i], in2[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class binary_op>
+int test_binary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, binary_op op)
+{
+    cl_mem buffers[3];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename binary_op::in1_type INPUT1;
+    typedef typename binary_op::in2_type INPUT2;
+    typedef typename binary_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT1>(device)
+         && type_supported<INPUT2>(device)
+         && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_binary<binary_op, INPUT1, INPUT2, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT1> in1_spec_cases = op.in1_special_cases();
+    std::vector<INPUT2> in2_spec_cases = op.in2_special_cases();
+    prepare_special_cases(in1_spec_cases, in2_spec_cases);
+    std::vector<INPUT1> input1 = generate_input<INPUT1>(count, op.min1(), op.max1(), in1_spec_cases);
+    std::vector<INPUT2> input2 = generate_input<INPUT2>(count, op.min2(), op.max2(), in2_spec_cases);
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT1) * input1.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT2) * input2.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[2] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(),
+        static_cast<void *>(input1.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(),
+        static_cast<void *>(input2.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[2], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_binary(input1, input2, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "test_%s %s(%s, %s) failed", op.str().c_str(),
+            type_name<OUTPUT>().c_str(), type_name<INPUT1>().c_str(), type_name<INPUT2>().c_str()
+        );
+    }
+    log_info(
+        "test_%s %s(%s, %s) passed\n", op.str().c_str(),
+        type_name<OUTPUT>().c_str(), type_name<INPUT1>().c_str(), type_name<INPUT2>().c_str()
+    );
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP

diff --git a/test_conformance/clcpp/utils_test/compare.hpp b/test_conformance/clcpp/utils_test/compare.hpp
new file mode 100644
index 0000000..a22b88f
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/compare.hpp

@@ -0,0 +1,161 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../common.hpp"
+
+// Checks if x is equal to y.
+template<class type, class delta_type, class op_type>
+inline bool are_equal(const type& x,
+                      const type& y,
+                      const delta_type& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        is_vector_type<type>::value
+                        && std::is_integral<typename scalar_type<type>::type>::value
+                      >::type* = 0)
+{
+    (void) delta;
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(op.is_out_bool())
+        {
+            if(!((x.s[i] != 0) == (y.s[i] != 0)))
+            {
+                return false;
+            }
+        }
+        else if(!(x.s[i] == y.s[i]))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template<class type, class delta_type, class op_type>
+inline bool are_equal(const type& x,
+                      const type& y,
+                      const delta_type& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        !is_vector_type<type>::value
+                        && std::is_integral<type>::value
+                      >::type* = 0)
+{
+    (void) delta;
+    if(op.is_out_bool())
+    {
+        if(!((x != 0) == (y != 0)))
+        {
+            return false;
+        }
+    }
+    return x == y;
+}
+
+template<class type, class type1, class type2, class op_type>
+inline bool are_equal(const type& x,
+                      const type1& y,
+                      const type2& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        !is_vector_type<type>::value
+                        && std::is_floating_point<type>::value
+                      >::type* = 0)
+{
+    // x - expected
+    // y - result
+
+    // INFO:
+    // Whe don't care about subnormal values in OpenCL C++ tests
+    if(std::fpclassify(static_cast<type1>(x)) == FP_SUBNORMAL || std::fpclassify(y) == FP_SUBNORMAL)
+    {
+        return true;
+    }
+
+    // both are NaN
+    if((std::isnan)(static_cast<type1>(x)) && (std::isnan)(y))
+    {
+        return true;
+    }
+    // one is NaN
+    else if((std::isnan)(static_cast<type1>(x)) || (std::isnan)(y))
+    {
+        return false;
+    }
+
+    // Check for perfect match, it also covers inf, -inf
+    if(static_cast<type1>(x) != y)
+    {
+        // Check if values are close
+        if(std::abs(static_cast<type1>(x) - y) > (std::max)(std::numeric_limits<type2>::epsilon(), std::abs(delta)))
+        {
+            return false;
+        }
+        // Check ulp
+        if(op.use_ulp())
+        {
+            return !(std::abs(Ulp_Error(x, y)) > op.ulp());
+        }
+    }
+    return true;
+}
+
+template<class type, class type1, class type2, class op_type>
+inline bool are_equal(const type& x,
+                      const type1& y,
+                      const type2& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        is_vector_type<type>::value
+                        && std::is_floating_point<typename scalar_type<type>::type>::value
+                      >::type* = 0)
+{
+    // x - expected
+    // y - result
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(!are_equal(x.s[i], y.s[i], delta.s[i], op))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template<class type, class type1, class func>
+inline void print_error_msg(const type& expected, const type1& result, size_t i, func op)
+{
+    log_error(
+        "ERROR: test_%s %s failed. Error at %lu: Expected: %s, got: %s\n",
+        op.str().c_str(),
+        op.decl_str().c_str(),
+        i,
+        format_value(expected).c_str(),
+        format_value(result).c_str()
+    );
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP

diff --git a/test_conformance/clcpp/utils_test/detail/base_func_type.hpp b/test_conformance/clcpp/utils_test/detail/base_func_type.hpp
new file mode 100644
index 0000000..92e375d
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/detail/base_func_type.hpp

@@ -0,0 +1,112 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../../common.hpp"
+
+#include "vec_helpers.hpp"
+
+namespace detail
+{
+
+template<class OUT1>
+struct base_func_type
+{   
+    virtual ~base_func_type() {};
+
+    // Returns function name
+    virtual std::string str() = 0;
+
+    // Returns name of the test kernel for that function
+    virtual std::string get_kernel_name()
+    {
+        std::string kn = this->str();
+        replace_all(kn, "::", "_");
+        return "test_" + kn;
+    }
+
+    // Returns required defines and pragmas.
+    virtual std::string defs()
+    {
+        return "";
+    }
+
+    // Returns required OpenCL C++ headers.
+    virtual std::string headers()
+    {
+        return "";
+    }
+
+    // Return true if OUT1 type in OpenCL kernel should be treated
+    // as bool type; false otherwise.
+    bool is_out_bool()
+    {
+        return false;
+    }
+
+    // Max ULP error, that is error should be raised when
+    // if Ulp_Error(result, expected) > ulp()
+    float ulp()
+    {
+        return 0.0f;
+    }
+
+    // Should we check ULP error when verifing if the result is
+    // correct? 
+    //
+    // (This effects how are_equal() function works, 
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    // Max error. Error should be raised if
+    // abs(result - expected) > delta(.., expected)
+    //
+    // Default value: 0.001 * expected
+    //
+    // (This effects how are_equal() function works, 
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const T& expected)
+    {
+        typedef 
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+} // detail namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP

diff --git a/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp b/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp
new file mode 100644
index 0000000..05df42a
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp

@@ -0,0 +1,104 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../../common.hpp"
+
+namespace detail
+{
+
+template<class T>
+T make_value(typename scalar_type<T>::type x, typename std::enable_if<is_vector_type<T>::value>::type* = 0)
+{
+    T value;
+    for(size_t i = 0; i < vector_size<T>::value; i++)
+    {
+        value.s[i] = x;
+    }
+    return value;
+}
+
+template<class T>
+T make_value(T x, typename std::enable_if<!is_vector_type<T>::value>::type* = 0)
+{
+    return x;
+}
+
+template<class result_type, class IN1, class IN2>
+result_type multiply(const IN1& x, const IN2& y, typename std::enable_if<is_vector_type<result_type>::value>::type* = 0)
+{
+    static_assert(
+        (vector_size<IN1>::value == vector_size<IN2>::value)
+            && (vector_size<IN2>::value == vector_size<result_type>::value),
+        "Vector sizes must be the same."
+    );
+    typedef typename scalar_type<result_type>::type SCALAR;
+    result_type value;
+    for(size_t i = 0; i < vector_size<result_type>::value; i++)
+    {
+        value.s[i] = static_cast<SCALAR>(x.s[i]) * static_cast<SCALAR>(y.s[i]);
+    }
+    return value;
+}
+
+template<class result_type, class IN1, class IN2>
+result_type multiply(const IN1& x, const IN2& y, typename std::enable_if<!is_vector_type<result_type>::value>::type* = 0)
+{
+    static_assert(
+        !is_vector_type<IN1>::value && !is_vector_type<IN2>::value,
+        "IN1 and IN2 must be scalar types"
+    );
+    return static_cast<result_type>(x) * static_cast<result_type>(y);
+}
+
+template<class T>
+T get_min()
+{
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::min)());
+}
+
+template<class T>
+T get_max()
+{
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::max)());
+}
+
+template<class T>
+T get_part_max(typename scalar_type<T>::type x)
+{
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::max)() / x);
+}
+
+template<class T>
+T def_limit(typename scalar_type<T>::type x)
+{
+    return make_value<T>(x);
+}
+
+} // detail namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP

diff --git a/test_conformance/clcpp/utils_test/generate_inputs.hpp b/test_conformance/clcpp/utils_test/generate_inputs.hpp
new file mode 100644
index 0000000..bb0d750
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/generate_inputs.hpp

@@ -0,0 +1,331 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../common.hpp"
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_integral<typename scalar_type<type>::type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && !(std::is_same<typename scalar_type<type>::type, cl_uchar>::value
+                                         || std::is_same<typename scalar_type<type>::type, cl_char>::value)
+                                 >::type* = 0)
+{
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_int_distribution<SCALAR>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        dists[i] = std::uniform_int_distribution<SCALAR>(min.s[i], max.s[i]);
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            i.s[j] = dists[j](gen);
+        }
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_integral<typename scalar_type<type>::type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && (std::is_same<typename scalar_type<type>::type, cl_uchar>::value
+                                        || std::is_same<typename scalar_type<type>::type, cl_char>::value)
+                                 >::type* = 0)
+{
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_int_distribution<cl_int>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        dists[i] = std::uniform_int_distribution<cl_int>(
+            static_cast<cl_int>(min.s[i]),
+            static_cast<cl_int>(max.s[i])
+        );
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            i.s[j] = static_cast<SCALAR>(dists[j](gen));
+        }
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_integral<type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && !(std::is_same<type, cl_uchar>::value || std::is_same<type, cl_char>::value)
+                                 >::type* = 0)
+{
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<type> dis(min, max);
+    for(auto& i : input)
+    {
+        i = dis(gen);
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_integral<type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && (std::is_same<type, cl_uchar>::value || std::is_same<type, cl_char>::value)
+                                 >::type* = 0)
+{
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<cl_int> dis(
+        static_cast<cl_int>(min), static_cast<cl_int>(max)
+    );
+    for(auto& i : input)
+    {
+        i = static_cast<type>(dis(gen));
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_floating_point<typename scalar_type<type>::type>::value
+                                 >::type* = 0)
+{
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_real_distribution<SCALAR>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        // Fatal error
+        if(std::fpclassify(max.s[i]) == FP_SUBNORMAL || std::fpclassify(min.s[i]) == FP_SUBNORMAL)
+        {
+            log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n");
+        }
+        dists[i] = std::uniform_real_distribution<SCALAR>(min.s[i], max.s[i]);
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            SCALAR x = dists[j](gen);
+            while(std::fpclassify(x) == FP_SUBNORMAL)
+            {
+                x = dists[j](gen);
+            }
+            i.s[j] = x;
+        }
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_floating_point<type>::value
+                                 >::type* = 0)
+{
+    // Fatal error
+    if(std::fpclassify(max) == FP_SUBNORMAL || std::fpclassify(min) == FP_SUBNORMAL)
+    {
+        log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n");
+    }
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<type> dis(min, max);
+    for(auto& i : input)
+    {
+        type x = dis(gen);
+        while(std::fpclassify(x) == FP_SUBNORMAL)
+        {
+            x = dis(gen);
+        }
+        i = x;
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_output(size_t count,
+                                  typename scalar_type<type>::type svalue = typename scalar_type<type>::type(0),
+                                  typename std::enable_if<is_vector_type<type>::value>::type* = 0)
+{
+    type value;
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+        value.s[i] = svalue;
+    return std::vector<type>(count, value);
+}
+
+template <class type>
+std::vector<type> generate_output(size_t count,
+                                  type svalue = type(0),
+                                  typename std::enable_if<!is_vector_type<type>::value>::type* = 0)
+{
+    return std::vector<type>(count, svalue);
+}
+
+template<class T, class K>
+void prepare_special_cases(std::vector<T>& in1_spec_cases, std::vector<K>& in2_spec_cases)
+{
+    if(in1_spec_cases.empty() || in2_spec_cases.empty())
+    {
+        return;
+    }
+
+    size_t new_size = in1_spec_cases.size() * in2_spec_cases.size();
+    std::vector<T> new_in1(new_size);
+    std::vector<K> new_in2(new_size);
+    for(size_t i = 0; i < in1_spec_cases.size(); i++)
+    {
+        for(size_t j = 0; j < in2_spec_cases.size(); j++)
+        {
+            new_in1[(i * in2_spec_cases.size()) + j] = in1_spec_cases[i];
+            new_in2[(i * in2_spec_cases.size()) + j] = in2_spec_cases[j];
+        }
+    }
+    in1_spec_cases = new_in1;
+    in2_spec_cases = new_in2;
+}
+
+template<class T, class K, class M>
+void prepare_special_cases(std::vector<T>& in1_spec_cases,
+                           std::vector<K>& in2_spec_cases,
+                           std::vector<M>& in3_spec_cases)
+{
+    if(in3_spec_cases.empty())
+    {
+        return prepare_special_cases(in1_spec_cases, in2_spec_cases);
+    }
+    else if (in2_spec_cases.empty())
+    {
+        return prepare_special_cases(in1_spec_cases, in3_spec_cases);
+    }
+    else if (in1_spec_cases.empty())
+    {
+        return prepare_special_cases(in2_spec_cases, in3_spec_cases);
+    }
+
+    size_t new_size = in1_spec_cases.size() * in2_spec_cases.size() * in3_spec_cases.size();
+    std::vector<T> new_in1(new_size);
+    std::vector<K> new_in2(new_size);
+    std::vector<M> new_in3(new_size);
+    for(size_t i = 0; i < in1_spec_cases.size(); i++)
+    {
+        for(size_t j = 0; j < in2_spec_cases.size(); j++)
+        {
+            for(size_t k = 0; k < in3_spec_cases.size(); k++)
+            {
+                size_t idx =
+                    (i * in2_spec_cases.size() * in3_spec_cases.size())
+                    + (j * in3_spec_cases.size())
+                    + k;
+                new_in1[idx] = in1_spec_cases[i];
+                new_in2[idx] = in2_spec_cases[j];
+                new_in3[idx] = in3_spec_cases[k];
+            }
+        }
+    }
+    in1_spec_cases = new_in1;
+    in2_spec_cases = new_in2;
+    in3_spec_cases = new_in3;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP

diff --git a/test_conformance/clcpp/utils_test/ternary.hpp b/test_conformance/clcpp/utils_test/ternary.hpp
new file mode 100644
index 0000000..342681e
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/ternary.hpp

@@ -0,0 +1,368 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP
+
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+
+#include "../common.hpp"
+
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+
+template<class IN1, class IN2, class IN3, class OUT1>
+struct ternary_func : public detail::base_func_type<OUT1>
+{
+    typedef IN1 in1_type;
+    typedef IN2 in2_type;
+    typedef IN3 in3_type;
+    typedef OUT1 out_type;
+
+    virtual ~ternary_func() {};
+    virtual std::string str() = 0;
+
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ", " + type_name<IN2>()+  ", " + type_name<IN3>() + ")";
+    }
+
+    bool is_in1_bool()
+    {
+        return false;
+    }
+
+    bool is_in2_bool()
+    {
+        return false;
+    }
+
+    bool is_in3_bool()
+    {
+        return false;
+    }
+
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+
+    IN2 min2()
+    {
+        return detail::get_min<IN2>();
+    }
+
+    IN2 max2()
+    {
+        return detail::get_max<IN2>();
+    }
+
+    IN3 min3()
+    {
+        return detail::get_min<IN3>();
+    }
+
+    IN3 max3()
+    {
+        return detail::get_max<IN3>();
+    }
+
+    std::vector<IN1> in1_special_cases()
+    {
+        return { };
+    }
+
+    std::vector<IN2> in2_special_cases()
+    {
+        return { };
+    }
+
+    std::vector<IN3> in3_special_cases()
+    {
+        return { };
+    }
+
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const IN2& in2, const IN3& in3, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        (void) in2;
+        (void) in3;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in1_type, class in2_type, class in3_type, class out_type>
+std::string generate_kernel_ternary(func_type func)
+{
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string in3_value = "input3[gid]";
+    if(func.is_in3_bool())
+    {
+        std::string i = vector_size<in3_type>::value == 1 ? "" : std::to_string(vector_size<in3_type>::value);
+        in3_value = "(input3[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in1_type>() + " *input1,\n"
+        "                                      global " + type_name<in2_type>() + " *input2,\n"
+        "                                      global " + type_name<in3_type>() + " *input3,\n"
+        "                                      global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in1_type, class in2_type, class in3_type, class out_type>
+std::string generate_kernel_ternary(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string in3_value = "input3[gid]";
+    if(func.is_in3_bool())
+    {
+        std::string i = vector_size<in3_type>::value == 1 ? "" : std::to_string(vector_size<in3_type>::value);
+        in3_value = "(input3[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool() || func.is_in3_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in1_type>() + "[]> input1,\n"
+        "                                      global_ptr<" + type_name<in2_type>() + "[]> input2,\n"
+        "                                      global_ptr<" + type_name<in3_type>() + "[]> input3,\n"
+        "                                      global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT1, class INPUT2, class INPUT3, class OUTPUT, class ternary_op>
+bool verify_ternary(const std::vector<INPUT1> &in1,
+                    const std::vector<INPUT2> &in2,
+                    const std::vector<INPUT3> &in3,
+                    const std::vector<OUTPUT> &out,
+                    ternary_op op)
+{
+    for(size_t i = 0; i < in1.size(); i++)
+    {
+        auto expected = op(in1[i], in2[i], in3[i]);
+        if(!are_equal(expected, out[i], op.delta(in1[i], in2[i], in3[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class ternary_op>
+int test_ternary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ternary_op op)
+{
+    cl_mem buffers[4];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename ternary_op::in1_type INPUT1;
+    typedef typename ternary_op::in2_type INPUT2;
+    typedef typename ternary_op::in3_type INPUT3;
+    typedef typename ternary_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT1>(device)
+         && type_supported<INPUT2>(device)
+         && type_supported<INPUT3>(device)
+         && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_ternary<ternary_op, INPUT1, INPUT2, INPUT3, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT1> in1_spec_cases = op.in1_special_cases();
+    std::vector<INPUT2> in2_spec_cases = op.in2_special_cases();
+    std::vector<INPUT3> in3_spec_cases = op.in3_special_cases();
+    prepare_special_cases(in1_spec_cases, in2_spec_cases, in3_spec_cases);
+    std::vector<INPUT1> input1 = generate_input<INPUT1>(count, op.min1(), op.max1(), in1_spec_cases);
+    std::vector<INPUT2> input2 = generate_input<INPUT2>(count, op.min2(), op.max2(), in2_spec_cases);
+    std::vector<INPUT3> input3 = generate_input<INPUT3>(count, op.min3(), op.max3(), in3_spec_cases);
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT1) * input1.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT2) * input2.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[2] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT3) * input3.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[3] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(),
+        static_cast<void *>(input1.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(),
+        static_cast<void *>(input2.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[2], CL_TRUE, 0, sizeof(INPUT3) * input3.size(),
+        static_cast<void *>(input3.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]);
+    err |= clSetKernelArg(kernel, 3, sizeof(buffers[3]), &buffers[3]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[3], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_ternary(input1, input2, input3, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "test_%s %s(%s, %s, %s) failed", op.str().c_str(),
+            type_name<OUTPUT>().c_str(),
+            type_name<INPUT1>().c_str(),
+            type_name<INPUT2>().c_str(),
+            type_name<INPUT3>().c_str()
+        );
+    }
+    log_info(
+        "test_%s %s(%s, %s, %s) passed\n", op.str().c_str(),
+        type_name<OUTPUT>().c_str(),
+        type_name<INPUT1>().c_str(),
+        type_name<INPUT2>().c_str(),
+        type_name<INPUT3>().c_str()
+    );
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseMemObject(buffers[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP

diff --git a/test_conformance/clcpp/utils_test/unary.hpp b/test_conformance/clcpp/utils_test/unary.hpp
new file mode 100644
index 0000000..2dbc647
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/unary.hpp

@@ -0,0 +1,261 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP
+
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+
+#include "../common.hpp"
+
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+
+template<class IN1, class OUT1>
+struct unary_func : public detail::base_func_type<OUT1>
+{
+    typedef IN1 in_type;
+    typedef OUT1 out_type;
+
+    virtual ~unary_func() {};
+    virtual std::string str() = 0;
+
+    // Return string with function type, for example: int(float).
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ")";
+    }
+
+    // Return true if IN1 type in OpenCL kernel should be treated
+    // as bool type; false otherwise.
+    bool is_in1_bool()
+    {
+        return false;
+    }
+
+    // Return min value that can be used as a first argument.
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+
+    // Return max value that can be used as a first argument.
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+
+    // This returns a list of special cases input values we want to
+    // test.
+    std::vector<IN1> in_special_cases()
+    {
+        return { };
+    }
+
+    // Max error. Error should be raised if
+    // abs(result - expected) > delta(.., expected)
+    //
+    // Default value: 0.001 * expected
+    //
+    // (This effects how are_equal() function works,
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_unary(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    // Convert uintN to boolN values
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in_type>::value == 1 ? "" : std::to_string(vector_size<in_type>::value);
+        in1_value = "(input[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ");";
+    // Convert boolN result of funtion func_type to uintN
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_unary(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in_type>::value == 1 ? "" : std::to_string(vector_size<in_type>::value);
+        in1_value = "(input[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class unary_op>
+bool verify_unary(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, unary_op op)
+{
+    for(size_t i = 0; i < in.size(); i++)
+    {
+        auto expected = op(in[i]);
+        if(!are_equal(expected, out[i], op.delta(in[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class unary_op>
+int test_unary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, unary_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename unary_op::in_type INPUT;
+    typedef typename unary_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_unary<unary_op, INPUT, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL,  &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL,  &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_unary(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP

diff --git a/test_conformance/clcpp/vload_vstore/CMakeLists.txt b/test_conformance/clcpp/vload_vstore/CMakeLists.txt
new file mode 100644
index 0000000..c66cb6f
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_VLOAD_VSTORE_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/vload_vstore/common.hpp b/test_conformance/clcpp/vload_vstore/common.hpp
new file mode 100644
index 0000000..84cd539
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/common.hpp

@@ -0,0 +1,81 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "half_utils.hpp"
+
+// Generates cl_half input
+std::vector<cl_half> generate_half_input(size_t count,
+                                         const cl_float& min,
+                                         const cl_float& max,
+                                         const std::vector<cl_half> special_cases)
+{
+    std::vector<cl_half> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<cl_float> dis(min, max);
+    for(auto& i : input)
+    {
+        i = float2half_rte(dis(gen));
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+// Generates input for vload_vstore tests, we can't just simply use function
+// generate_input<type>(...), because cl_half is typedef of cl_short (but generating
+// cl_shorts and generating cl_halfs are different operations).
+template <class type>
+std::vector<type> vload_vstore_generate_input(size_t count,
+                                              const type& min,
+                                              const type& max, 
+                                              const std::vector<type> special_cases,
+                                              const bool generate_half,
+                                              typename std::enable_if<
+                                                  std::is_same<type, cl_half>::value
+                                              >::type* = 0)
+{
+    if(!generate_half)
+    {
+        return generate_input<type>(count, min, max, special_cases);
+    }
+    return generate_half_input(count, -(CL_HALF_MAX/4.f), (CL_HALF_MAX/4.f), special_cases);
+}
+
+// If !std::is_same<type, cl_half>::value, we can just use generate_input<type>(...).
+template <class type>
+std::vector<type> vload_vstore_generate_input(size_t count,
+                                              const type& min,
+                                              const type& max, 
+                                              const std::vector<type> special_cases,
+                                              const bool generate_half,
+                                              typename std::enable_if<
+                                                  !std::is_same<type, cl_half>::value
+                                              >::type* = 0)
+{
+    return generate_input<type>(count, min, max, special_cases);
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP

diff --git a/test_conformance/clcpp/vload_vstore/half_utils.hpp b/test_conformance/clcpp/vload_vstore/half_utils.hpp
new file mode 100644
index 0000000..5c60599
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/half_utils.hpp

@@ -0,0 +1,136 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP
+#define TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <cmath>
+
+namespace detail 
+{
+
+template<class INT_TYPE>
+inline int clz(INT_TYPE x)
+{
+    int count = 0;
+    if(std::is_unsigned<INT_TYPE>::value)
+    {
+        cl_ulong value = x;
+        value <<= 8 * sizeof(value) - (8 * sizeof(x));
+        for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+        {
+            value <<= 1;
+        }
+    }
+    else
+    {            
+        cl_long value = x;
+        value <<= 8 * sizeof(value) - (8 * sizeof(x));
+        for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+        {
+            value <<= 1;
+        }
+    }
+    return count;
+}
+
+} // namespace detail 
+
+inline cl_float half2float(cl_half us)
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ cl_uint u; cl_float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = detail::clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+inline cl_ushort float2half_rte(cl_float f)
+{
+    union{ cl_float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    cl_float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP

diff --git a/test_conformance/clcpp/vload_vstore/main.cpp b/test_conformance/clcpp/vload_vstore/main.cpp
new file mode 100644
index 0000000..3893905
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "vload_funcs.hpp"
+#include "vstore_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/vload_vstore/vload_funcs.hpp b/test_conformance/clcpp/vload_vstore/vload_funcs.hpp
new file mode 100644
index 0000000..f0bbcfc
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/vload_funcs.hpp

@@ -0,0 +1,363 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP
+
+#include <iterator>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vload(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(func.is_in1_half())
+    {
+        input1_type_str = "half";
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(N == 3)
+    {
+        output1_type_str[output1_type_str.size() - 1] = '3';
+    }
+    return
+        "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + func.str() + std::to_string(N) + "(gid, input);\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vload(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(func.is_in1_half())
+    {
+        input1_type_str = "half";
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(N == 3)
+    {
+        output1_type_str[output1_type_str.size() - 1] = '3';
+    }
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str +  "[]> input,"
+                                              "global_ptr<" + output1_type_str + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + func.str() + "<" + std::to_string(N) + ">(gid, input.get());\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class vload_op>
+bool verify_vload(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, vload_op op)
+{
+    for(size_t i = 0; i < out.size(); i++)
+    {
+        auto expected = op(i, in.begin());
+        for(size_t j = 0; j < vload_op::vector_size; j++)
+        {
+            size_t idx = (i * vector_size<OUTPUT>::value) + j;
+            if(!are_equal(expected.s[j], out[i].s[j], op.delta(in[idx], expected.s[j]), op))
+            {
+                print_error_msg(expected, out[i], i, op);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template <class vload_op>
+int test_vload_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename vload_op::in_type INPUT;
+    typedef typename vload_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_vload<vload_op, INPUT, OUTPUT, vload_op::vector_size>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT> input = vload_vstore_generate_input<INPUT>(
+        count * vector_size<OUTPUT>::value, op.min1(), op.max1(), op.in_special_cases(), op.is_in1_half()
+    );
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_vload(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed",
+            op.str().c_str(),
+            type_name<OUTPUT>().c_str(),
+            type_name<INPUT>().c_str()
+        );
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+template <class IN1, cl_int N /* Vector size */>
+struct vload_func : public unary_func<
+                        IN1,
+                        typename make_vector_type<IN1, N>::type /* create IN1N type */
+                    >
+{
+    typedef typename make_vector_type<IN1, N>::type result_type;
+    const static size_t vector_size = N;
+
+    std::string str()
+    {
+        return "vload";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            !is_vector_type<IN1>::value,
+            "IN1 must be scalar type"
+        );
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, IN1>::value,
+            "std::iterator_traits<Iterator>::value_type must be IN1"
+        );
+
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+
+        result_type r;
+        Iterator temp = x + static_cast<diff_type>(offset * N);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = *temp;
+            temp++;
+        }
+        return r;
+    }
+
+    bool is_in1_half()
+    {
+        return false;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vload_half_func : public unary_func<
+                            cl_half,
+                            typename make_vector_type<cl_float, N>::type /* create IN1N type */
+                         >
+{
+    typedef typename make_vector_type<cl_float, N>::type result_type;
+    const static size_t vector_size = N;
+
+    std::string str()
+    {
+        return "vload_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, cl_half>::value,
+            "std::iterator_traits<Iterator>::value_type must be cl_half"
+        );
+
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+
+        result_type r;
+        Iterator temp = x + static_cast<diff_type>(offset * N);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = half2float(*temp);
+            temp++;
+        }
+        return r;
+    }
+
+    bool is_in1_half()
+    {
+        return true;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vloada_half_func : public unary_func<
+                            cl_half,
+                            typename make_vector_type<cl_float, N>::type /* create IN1N type */
+                         >
+{
+    typedef typename make_vector_type<cl_float, N>::type result_type;
+    const static size_t vector_size = N;
+
+    std::string str()
+    {
+        return "vloada_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, cl_half>::value,
+            "std::iterator_traits<Iterator>::value_type must be cl_half"
+        );
+
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+
+        result_type r;
+        size_t alignment = N == 3 ? 4 : N;
+        Iterator temp = x + static_cast<diff_type>(offset * alignment);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = half2float(*temp);
+            temp++;
+        }
+        return r;
+    }
+
+    bool is_in1_half()
+    {
+        return true;
+    }
+};
+
+AUTO_TEST_CASE(test_vload_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_VLOAD_FUNC_MACRO(CLASS) \
+    last_error = test_vload_func( \
+        device, context, queue, n_elems, CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_uint,  2>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_float, 4>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_short, 8>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_int, 16>()))
+
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<2>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<3>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<4>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<8>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<16>()))
+
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<2>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<3>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<4>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<8>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<16>()))
+
+#undef TEST_VLOAD_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP

diff --git a/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp b/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp
new file mode 100644
index 0000000..5bf83f5
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp

@@ -0,0 +1,348 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <iterator>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vstore(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(N == 3)
+    {
+        input1_type_str[input1_type_str.size() - 1] = '3';
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(func.is_out_half())
+    {
+        output1_type_str = "half";
+    }
+    return
+        "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + func.str() + std::to_string(N) + "(input[gid], gid, output);\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vstore(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(N == 3)
+    {
+        input1_type_str[input1_type_str.size() - 1] = '3';
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(func.is_out_half())
+    {
+        output1_type_str = "half";
+    }
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str +  "[]> input,"
+                                              "global_ptr<" + output1_type_str + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + func.str() + "(input[gid], gid, output.get());\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class vload_op>
+bool verify_vstore(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, vload_op op)
+{
+    for(size_t i = 0; i < in.size(); i++)
+    {
+        auto expected = op(in[i]);
+        for(size_t j = 0; j < vload_op::vector_size; j++)
+        {
+            size_t idx = (i * vload_op::vec_alignment) + j;
+            if(!are_equal(expected.s[j], out[idx], op.delta(in[i], expected).s[j], op))
+            {
+                print_error_msg(expected.s[j], out[idx], idx, op);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template <class vload_op>
+int test_vstore_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename vload_op::in_type INPUT;
+    typedef typename vload_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_vstore<vload_op, INPUT, OUTPUT, vload_op::vector_size>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count * vector_size<INPUT>::value);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_vstore(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+template <class T, cl_int N /* Vector size */>
+struct vstore_func : public unary_func<
+                        typename make_vector_type<T, N>::type,
+                        T
+                     >
+{
+    typedef typename make_vector_type<T, N>::type input1_type;
+    typedef typename make_vector_type<T, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N;
+
+    std::string str()
+    {
+        return "vstore";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    result_type operator()(const input1_type& in)
+    {
+        static_assert(
+            !is_vector_type<T>::value,
+            "T must be scalar type"
+        );
+        return in;
+    }
+
+    bool is_out_half()
+    {
+        return false;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vstore_half_func : public unary_func<
+                            typename make_vector_type<cl_float, N>::type,
+                            cl_half
+                          >
+{
+    typedef typename make_vector_type<cl_float, N>::type input1_type;
+    typedef typename make_vector_type<cl_half, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N;
+
+    std::string str()
+    {
+        return "vstore_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    result_type operator()(const input1_type& in)
+    {
+        result_type r;
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = float2half_rte(in.s[i]);
+        }
+        return r;
+    }
+
+    input1_type min1()
+    {
+        return detail::make_value<input1_type>(-512.f);
+    }
+
+    input1_type max1()
+    {
+        return detail::make_value<input1_type>(512.f);
+    }
+
+    bool is_out_half()
+    {
+        return true;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vstorea_half_func : public unary_func<
+                            typename make_vector_type<cl_float, N>::type,
+                            cl_half
+                          >
+{
+    typedef typename make_vector_type<cl_float, N>::type input1_type;
+    typedef typename make_vector_type<cl_half, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N == 3 ? 4 : N;
+
+    std::string str()
+    {
+        return "vstorea_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    result_type operator()(const input1_type& in)
+    {
+        result_type r;
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = float2half_rte(in.s[i]);
+        }
+        return r;
+    }
+
+    input1_type min1()
+    {
+        return detail::make_value<input1_type>(-512.f);
+    }
+
+    input1_type max1()
+    {
+        return detail::make_value<input1_type>(512.f);
+    }
+
+    bool is_out_half()
+    {
+        return true;
+    }
+};
+
+AUTO_TEST_CASE(test_vstore_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_VSTORE_FUNC_MACRO(CLASS) \
+    last_error = test_vstore_func( \
+        device, context, queue, n_elems, CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uint, 2>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uint, 3>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_int, 4>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_float, 8>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uchar, 16>()))
+
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<2>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<3>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<4>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<8>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<16>()))
+
+    TEST_VSTORE_FUNC_MACRO((vstorea_half_func<2>()))
+    TEST_VSTORE_FUNC_MACRO((vstorea_half_func<3>()))
+
+#undef TEST_VSTORE_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP

diff --git a/test_conformance/clcpp/workgroups/CMakeLists.txt b/test_conformance/clcpp/workgroups/CMakeLists.txt
new file mode 100644
index 0000000..812e982
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_WORKGROUPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/workgroups/common.hpp b/test_conformance/clcpp/workgroups/common.hpp
new file mode 100644
index 0000000..ab7b100
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/common.hpp

@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
+
+#include <string>
+#include <vector>
+#include <limits>
+
+enum class work_group_op : int {
+    add, min, max    
+};
+
+std::string to_string(work_group_op op)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return "add";
+        case work_group_op::min:
+            return "min";
+        case work_group_op::max:
+            return "max";
+        default:
+            break;
+    }
+    return "";
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
+{
+    std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));      
+    switch (op)
+    {
+        case work_group_op::add:
+            return input;
+        case work_group_op::min:
+            {                
+                size_t j = wg_size;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j--;
+                    if(j == 0)
+                    {
+                        j = wg_size;
+                    }
+                }
+            }
+            break;         
+        case work_group_op::max:          
+            {                
+                size_t j = 0;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j++;
+                    if(j == wg_size)
+                    {
+                        j = 0;
+                    }
+                }
+            }
+    }
+    return input;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
+{       
+    switch (op)
+    {
+        case work_group_op::add:
+            return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+        case work_group_op::min:  
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());       
+        case work_group_op::max:          
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
+    }
+    return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP

diff --git a/test_conformance/clcpp/workgroups/main.cpp b/test_conformance/clcpp/workgroups/main.cpp
new file mode 100644
index 0000000..508753c
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/main.cpp

@@ -0,0 +1,29 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_wg_all.hpp"
+#include "test_wg_any.hpp"
+#include "test_wg_broadcast.hpp"
+#include "test_wg_reduce.hpp"
+#include "test_wg_scan_inclusive.hpp"
+#include "test_wg_scan_exclusive.hpp"
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/workgroups/test_wg_all.hpp b/test_conformance/clcpp/workgroups/test_wg_all.hpp
new file mode 100644
index 0000000..103ce2b
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_all.hpp

@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_all_kernel_code()
+{
+    return
+        "__kernel void test_wg_all(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_all(input[tid] < input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_all_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_all(input[tid] < input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+#endif
+
+int verify_wg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group all
+        bool all = true;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(!(in[i+j] < in[i+j+1]))
+            {
+                all = false;
+                break;
+            }
+        }
+
+        // Convert bool to uint
+        cl_uint all_uint = all ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (all_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(all_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_all(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] < input[tid+1] will
+        // generate false, that means for that workgroups work_group_all()
+        // should return false
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_all(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_all_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_wg_all(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_all(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_all(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_all failed");
+    }
+    log_info("work_group_all passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_all)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+
+    err = work_group_all(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP

diff --git a/test_conformance/clcpp/workgroups/test_wg_any.hpp b/test_conformance/clcpp/workgroups/test_wg_any.hpp
new file mode 100644
index 0000000..724b3ce
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_any.hpp

@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_any_kernel_code()
+{
+    return
+        "__kernel void test_wg_any(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_any(input[tid] == input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_any_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_any(input[tid] == input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+#endif
+
+int verify_wg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group any
+        bool any = false;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(in[i+j] == in[i+j+1])
+            {
+                any = true;
+                break;
+            }
+        }
+
+        // Convert bool to uint
+        cl_uint any_uint = any ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (any_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(any_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_any(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] == input[tid+1] will
+        // generate true, that means for that workgroups work_group_any()
+        // should return true
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_any(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_any_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_wg_any(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_any(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_any(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_any failed");
+    }
+    log_info("work_group_any passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_any)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+
+    err = work_group_any(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP

diff --git a/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp
new file mode 100644
index 0000000..4dc5559
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp

@@ -0,0 +1,458 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_broadcast_1D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+std::string generate_wg_broadcast_2D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+}
+std::string generate_wg_broadcast_3D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    ulong tid_z = get_global_id(2);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t z = get_group_id(2) % get_local_size(2);\n"
+        "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_broadcast_1D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+std::string generate_wg_broadcast_2D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+}
+std::string generate_wg_broadcast_3D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    ulong tid_z = get_global_id(2);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t z = get_group_id(2) % get_local_size(2);\n"
+           "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+}
+#endif
+
+int
+verify_wg_broadcast_1D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t n, size_t wg_size)
+{
+    size_t i, j;
+    size_t group_id;
+
+    for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
+    {
+        int local_size = (n-i) > wg_size ? wg_size : (n-i);
+        cl_uint broadcast_result = in[i + (group_id % local_size)];
+        for (j=0; j<local_size; j++)
+        {
+            if ( broadcast_result != out[i+j] )
+            {
+                log_info("work_group_broadcast: Error at %lu: expected = %u, got = %u\n", i+j, broadcast_result, out[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int
+verify_wg_broadcast_2D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny,
+                       size_t wg_size_x, size_t wg_size_y)
+{
+    size_t i, j, _i, _j;
+    size_t group_id_x, group_id_y;
+
+    for (i=0,group_id_y=0; i<ny; i+=wg_size_y,group_id_y++)
+    {
+        size_t y = group_id_y % wg_size_y;
+        size_t local_size_y = (ny-i) > wg_size_y ? wg_size_y : (ny-i);
+        for (_i=0; _i < local_size_y; _i++)
+        {
+            for (j=0,group_id_x=0; j<nx; j+=wg_size_x,group_id_x++)
+            {
+                size_t x = group_id_x % wg_size_x;
+                size_t local_size_x = (nx-j) > wg_size_x ? wg_size_x : (nx-j);
+                cl_uint broadcast_result = in[(i + y) * nx + (j + x)];
+                for (_j=0; _j < local_size_x; _j++)
+                {
+                    size_t indx = (i + _i) * nx + (j + _j);
+                    if ( broadcast_result != out[indx] )
+                    {
+                        log_info("%lu\n", indx);
+                        log_info("%lu\n", ((i + y) * nx + (j + x)));
+                         log_info("%lu\n", out.size());
+                        log_info("work_group_broadcast: Error at (%lu, %lu): expected = %u, got = %u\n", j+_j, i+_i, broadcast_result, out[indx]);
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int
+verify_wg_broadcast_3D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny, size_t nz,
+                       size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
+{
+    size_t i, j, k, _i, _j, _k;
+    size_t group_id_x, group_id_y, group_id_z;
+
+    for (i=0,group_id_z=0; i<nz; i+=wg_size_z,group_id_z++)
+    {
+        size_t z = group_id_z % wg_size_z;
+        size_t local_size_z = (nz-i) > wg_size_z ? wg_size_z : (nz-i);
+        for (_i=0; _i < local_size_z; _i++)
+        {
+            for (j=0,group_id_y=0; j<ny; j+=wg_size_y,group_id_y++)
+            {
+                size_t y = group_id_y % wg_size_y;
+                size_t local_size_y = (ny-j) > wg_size_y ? wg_size_y : (ny-j);
+                for (_j=0; _j < local_size_y; _j++)
+                {
+                    for (k=0,group_id_x=0; k<nx; k+=wg_size_x,group_id_x++)
+                    {
+                        size_t x = group_id_x % wg_size_x;
+                        size_t local_size_x = (nx-k) > wg_size_x ? wg_size_x : (nx-k);
+                        cl_uint broadcast_result = in[(i + z) * ny * nz + (j + y) * nx + (k + x)];
+                        for (_k=0; _k < local_size_x; _k++)
+                        {
+                            size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k);
+                            if ( broadcast_result != out[indx] )
+                            {
+                                log_info(
+                                    "work_group_broadcast: Error at (%lu, %lu, %lu): expected = %u, got = %u\n",
+                                    k+_k, j+_j, i+_i,
+                                    broadcast_result, out[indx]);
+                                return -1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_broadcast(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(j);
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_broadcast(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, size_t dim)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t flat_wg_size;
+    size_t wg_size[] = { 1, 1, 1};
+    size_t work_size[] = { 1, 1, 1};
+    int err;
+
+    // Get kernel source code
+    std::string code_str;
+    if(dim > 2) code_str = generate_wg_broadcast_3D_kernel_code();
+    else if(dim > 1) code_str = generate_wg_broadcast_2D_kernel_code();
+    else code_str = generate_wg_broadcast_1D_kernel_code();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    RETURN_ON_ERROR(err)
+#endif
+
+    // Get max flat workgroup size
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &flat_wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Set local work size
+    wg_size[0] = flat_wg_size;
+    if(dim > 2)
+    {
+        if (flat_wg_size >=512)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 8;
+        }
+        else if (flat_wg_size >= 64)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 4;
+        }
+        else if (flat_wg_size >= 8)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 2;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 1;
+        }
+    }
+    else if(dim > 1)
+    {
+        if (flat_wg_size >= 256)
+        {
+            wg_size[0] = wg_size[1] = 16;
+        }
+        else if (flat_wg_size >=64)
+        {
+            wg_size[0] = wg_size[1] = 8;
+        }
+        else if (flat_wg_size >= 16)
+        {
+            wg_size[0] = wg_size[1] = 4;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = 1;
+        }
+    }
+
+    // Calculate flat local work size
+    flat_wg_size = wg_size[0];
+    if(dim > 1) flat_wg_size *= wg_size[1];
+    if(dim > 2) flat_wg_size *= wg_size[2];
+
+    // Calculate global work size
+    size_t flat_work_size = count;
+    // 3D
+    if(dim > 2)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 3) / (wg_size[0] * wg_size[1] * wg_size[2]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        work_size[2] = wg_number * wg_size[2];
+        flat_work_size = work_size[0] * work_size[1] * work_size[2];
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 2) / (wg_size[0] * wg_size[1]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        flat_work_size = work_size[0] * work_size[1];
+    }
+    // 1D
+    else
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size[0])
+        );
+        flat_work_size = wg_number * wg_size[0];
+        work_size[0] = flat_work_size;
+    }
+
+    std::vector<cl_uint> input = generate_input_wg_broadcast(flat_work_size, flat_wg_size);
+    std::vector<cl_uint> output = generate_output_wg_broadcast(flat_work_size, flat_wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    int result = CL_SUCCESS;
+    // 3D
+    if(dim > 2)
+    {
+        result = verify_wg_broadcast_3D(
+            input, output,
+            work_size[0], work_size[1], work_size[2],
+            wg_size[0], wg_size[1], wg_size[2]
+        );
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        result = verify_wg_broadcast_2D(
+            input, output,
+            work_size[0], work_size[1],
+            wg_size[0], wg_size[1]
+        );
+    }
+    // 1D
+    else
+    {
+        result = verify_wg_broadcast_1D(
+            input, output,
+            work_size[0],
+            wg_size[0]
+        );
+    }
+
+    RETURN_ON_ERROR_MSG(result, "work_group_broadcast_%luD failed", dim);
+    log_info("work_group_broadcast_%luD passed\n", dim);
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_broadcast)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 1);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 2);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 3);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP

diff --git a/test_conformance/clcpp/workgroups/test_wg_reduce.hpp b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp
new file mode 100644
index 0000000..616cbdb
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp

@@ -0,0 +1,331 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+{
+    return
+        "__kernel void test_wg_reduce(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                        "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+        // Work-group sum
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            sum += in[i + j];
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+        // Work-group min
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            min = std::min<CL_INT_TYPE>(min, in[i + j]);
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+        // Work-group max
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            max = std::max<CL_INT_TYPE>(max, in[i + j]);
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_reduce_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_reduce_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_reduce_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_reduce_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_reduce<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP

diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp
new file mode 100644
index 0000000..35ec4b1
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp

@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+{
+    return
+        "__kernel void test_wg_scan_exclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            sum += in[i + j];
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            min = (std::min)(min, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            max = (std::max)(max, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_exclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_exclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_exclusive_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP

diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp
new file mode 100644
index 0000000..34096eb
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp

@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+{
+    return
+        "__kernel void test_wg_scan_inclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            sum += in[i + j];
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            min = (std::min)(min, in[i + j]);
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            max = (std::max)(max, in[i + j]);
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_inclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_inclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_inclusive_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP

diff --git a/test_conformance/clcpp/workitems/CMakeLists.txt b/test_conformance/clcpp/workitems/CMakeLists.txt
new file mode 100644
index 0000000..0035933
--- /dev/null
+++ b/test_conformance/clcpp/workitems/CMakeLists.txt

@@ -0,0 +1,7 @@
+set(MODULE_NAME CPP_WORKITEMS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+)
+
+include(../../CMakeCommon.txt)

diff --git a/test_conformance/clcpp/workitems/main.cpp b/test_conformance/clcpp/workitems/main.cpp
new file mode 100644
index 0000000..1c322df
--- /dev/null
+++ b/test_conformance/clcpp/workitems/main.cpp

@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_workitems.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(), tests.data(), false, false, 0);
+}

diff --git a/test_conformance/clcpp/workitems/test_workitems.hpp b/test_conformance/clcpp/workitems/test_workitems.hpp
new file mode 100644
index 0000000..099ef34
--- /dev/null
+++ b/test_conformance/clcpp/workitems/test_workitems.hpp

@@ -0,0 +1,417 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP
+#define TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP
+
+#include <vector>
+#include <algorithm>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_workitems {
+
+struct test_options
+{
+    bool uniform_work_group_size;
+    size_t max_count;
+    size_t num_tests;
+};
+
+struct output_type
+{
+    cl_uint  work_dim;
+    cl_ulong global_size[3];
+    cl_ulong global_id[3];
+    cl_ulong local_size[3];
+    cl_ulong enqueued_local_size[3];
+    cl_ulong local_id[3];
+    cl_ulong num_groups[3];
+    cl_ulong group_id[3];
+    cl_ulong global_offset[3];
+    cl_ulong global_linear_id;
+    cl_ulong local_linear_id;
+    cl_ulong sub_group_size;
+    cl_ulong max_sub_group_size;
+    cl_ulong num_sub_groups;
+    cl_ulong enqueued_num_sub_groups;
+    cl_ulong sub_group_id;
+    cl_ulong sub_group_local_id;
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    uint  work_dim;
+    ulong global_size[3];
+    ulong global_id[3];
+    ulong local_size[3];
+    ulong enqueued_local_size[3];
+    ulong local_id[3];
+    ulong num_groups[3];
+    ulong group_id[3];
+    ulong global_offset[3];
+    ulong global_linear_id;
+    ulong local_linear_id;
+    ulong sub_group_size;
+    ulong max_sub_group_size;
+    ulong num_sub_groups;
+    ulong enqueued_num_sub_groups;
+    ulong sub_group_id;
+    ulong sub_group_local_id;
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const std::string source =
+    source_common +
+    R"(
+        #ifdef cl_khr_subgroups
+        #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+        #endif
+
+        kernel void test(global struct output_type *output)
+        {
+           const ulong gid = get_global_linear_id();
+           output[gid].work_dim = get_work_dim();
+           for (uint dimindx = 0; dimindx < 3; dimindx++)
+           {
+               output[gid].global_size[dimindx] = get_global_size(dimindx);
+               output[gid].global_id[dimindx] = get_global_id(dimindx);
+               output[gid].local_size[dimindx] = get_local_size(dimindx);
+               output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx);
+               output[gid].local_id[dimindx] = get_local_id(dimindx);
+               output[gid].num_groups[dimindx] = get_num_groups(dimindx);
+               output[gid].group_id[dimindx] = get_group_id(dimindx);
+               output[gid].global_offset[dimindx] = get_global_offset(dimindx);
+           }
+           output[gid].global_linear_id = get_global_linear_id();
+           output[gid].local_linear_id = get_local_linear_id();
+        #ifdef cl_khr_subgroups
+           output[gid].sub_group_size = get_sub_group_size();
+           output[gid].max_sub_group_size = get_max_sub_group_size();
+           output[gid].num_sub_groups = get_num_sub_groups();
+           output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+           output[gid].sub_group_id = get_sub_group_id();
+           output[gid].sub_group_local_id = get_sub_group_local_id();
+        #endif
+        }
+    )";
+#else
+const std::string source =
+    R"(
+        #include <opencl_memory>
+        #include <opencl_work_item>
+        using namespace cl;
+    )" +
+    source_common +
+    R"(
+
+        kernel void test(global_ptr<output_type[]> output)
+        {
+           const size_t gid = get_global_linear_id();
+           output[gid].work_dim = get_work_dim();
+           for (uint dimindx = 0; dimindx < 3; dimindx++)
+           {
+               output[gid].global_size[dimindx] = get_global_size(dimindx);
+               output[gid].global_id[dimindx] = get_global_id(dimindx);
+               output[gid].local_size[dimindx] = get_local_size(dimindx);
+               output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx);
+               output[gid].local_id[dimindx] = get_local_id(dimindx);
+               output[gid].num_groups[dimindx] = get_num_groups(dimindx);
+               output[gid].group_id[dimindx] = get_group_id(dimindx);
+               output[gid].global_offset[dimindx] = get_global_offset(dimindx);
+           }
+           output[gid].global_linear_id = get_global_linear_id();
+           output[gid].local_linear_id = get_local_linear_id();
+           output[gid].sub_group_size = get_sub_group_size();
+           output[gid].max_sub_group_size = get_max_sub_group_size();
+           output[gid].num_sub_groups = get_num_sub_groups();
+           output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+           output[gid].sub_group_id = get_sub_group_id();
+           output[gid].sub_group_local_id = get_sub_group_local_id();
+        }
+
+    )";
+#endif
+
+#define CHECK_EQUAL(result, expected, func_name) \
+    if (result != expected) \
+    { \
+        RETURN_ON_ERROR_MSG(-1, \
+            "Function %s failed. Expected: %s, got: %s", func_name, \
+            format_value(expected).c_str(), format_value(result).c_str() \
+        ); \
+    }
+
+#define CHECK(expression, func_name) \
+    if (expression) \
+    { \
+        RETURN_ON_ERROR_MSG(-1, \
+            "Function %s returned incorrect result", func_name \
+        ); \
+    }
+
+int test_workitems(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t max_work_group_size;
+    size_t max_local_sizes[3];
+    error = get_max_allowed_work_group_size(context, kernel, &max_work_group_size, max_local_sizes);
+    RETURN_ON_ERROR(error)
+
+    bool check_sub_groups = true;
+    bool check_sub_groups_limits = true;
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    check_sub_groups = false;
+    check_sub_groups_limits = false;
+    if (is_extension_available(device, "cl_khr_subgroups"))
+    {
+        Version version = get_device_cl_version(device);
+        RETURN_ON_ERROR(error)
+        check_sub_groups_limits = (version >= Version(2,1)); // clGetKernelSubGroupInfo is from 2.1
+        check_sub_groups = true;
+    }
+#endif
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
+
+    for (int test = 0; test < options.num_tests; test++)
+    {
+        for (size_t dim = 1; dim <= 3; dim++)
+        {
+            size_t global_size[3] = { 1, 1, 1 };
+            size_t global_offset[3] = { 0, 0, 0 };
+            size_t enqueued_local_size[3] = { 1, 1, 1 };
+            size_t count = count_dis(gen);
+            std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
+            for (int d = 0; d < dim; d++)
+            {
+                std::uniform_int_distribution<size_t> enqueued_local_size_dis(1, max_local_sizes[d]);
+                global_size[d] = global_size_dis(gen);
+                global_offset[d] = global_size_dis(gen);
+                enqueued_local_size[d] = enqueued_local_size_dis(gen);
+            }
+            // Local work size must not exceed CL_KERNEL_WORK_GROUP_SIZE for this kernel
+            while (enqueued_local_size[0] * enqueued_local_size[1] * enqueued_local_size[2] > max_work_group_size)
+            {
+                // otherwise decrease it until it fits
+                for (int d = 0; d < dim; d++)
+                {
+                    enqueued_local_size[d] = (std::max)((size_t)1, enqueued_local_size[d] / 2);
+                }
+            }
+            if (options.uniform_work_group_size)
+            {
+                for (int d = 0; d < dim; d++)
+                {
+                    global_size[d] = get_uniform_global_size(global_size[d], enqueued_local_size[d]);
+                }
+            }
+            count = global_size[0] * global_size[1] * global_size[2];
+
+            cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error);
+            RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+            const char pattern = 0;
+            error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+            error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            error = clEnqueueNDRangeKernel(queue, kernel, dim, global_offset, global_size, enqueued_local_size, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+            std::vector<output_type> output(count);
+            error = clEnqueueReadBuffer(
+                queue, output_buffer, CL_TRUE,
+                0, sizeof(output_type) * count,
+                static_cast<void *>(output.data()),
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+            error = clReleaseMemObject(output_buffer);
+            RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+
+            size_t sub_group_count_for_ndrange = 0;
+            size_t max_sub_group_size_for_ndrange = 0;
+            if (check_sub_groups_limits)
+            {
+                error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                    sizeof(size_t) * dim, enqueued_local_size,
+                    sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+                RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+
+                error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+                    sizeof(size_t) * dim, enqueued_local_size,
+                    sizeof(size_t), &max_sub_group_size_for_ndrange, NULL);
+                RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            }
+
+            size_t num_groups[3];
+            for (int d = 0; d < 3; d++)
+                num_groups[d] = static_cast<size_t>(std::ceil(static_cast<double>(global_size[d]) / enqueued_local_size[d]));
+
+            size_t group_id[3];
+            for (group_id[0] = 0; group_id[0] < num_groups[0]; group_id[0]++)
+            for (group_id[1] = 0; group_id[1] < num_groups[1]; group_id[1]++)
+            for (group_id[2] = 0; group_id[2] < num_groups[2]; group_id[2]++)
+            {
+                size_t local_size[3];
+                for (int d = 0; d < 3; d++)
+                {
+                    if (group_id[d] == num_groups[d] - 1)
+                        local_size[d] = global_size[d] - group_id[d] * enqueued_local_size[d];
+                    else
+                        local_size[d] = enqueued_local_size[d];
+                }
+
+                size_t local_id[3];
+                for (local_id[0] = 0; local_id[0] < local_size[0]; local_id[0]++)
+                for (local_id[1] = 0; local_id[1] < local_size[1]; local_id[1]++)
+                for (local_id[2] = 0; local_id[2] < local_size[2]; local_id[2]++)
+                {
+                    size_t global_id_wo_offset[3];
+                    size_t global_id[3];
+                    for (int d = 0; d < 3; d++)
+                    {
+                        global_id_wo_offset[d] = group_id[d] * enqueued_local_size[d] + local_id[d];
+                        global_id[d] = global_id_wo_offset[d] + global_offset[d];
+                    }
+
+                    // Ignore if the current work-item is outside of global work size (i.e. the work-group is non-uniform)
+                    if (global_id_wo_offset[0] >= global_size[0] ||
+                        global_id_wo_offset[1] >= global_size[1] ||
+                        global_id_wo_offset[2] >= global_size[2]) break;
+
+                    const size_t global_linear_id =
+                        global_id_wo_offset[2] * global_size[1] * global_size[0] +
+                        global_id_wo_offset[1] * global_size[0] +
+                        global_id_wo_offset[0];
+                    const size_t local_linear_id =
+                        local_id[2] * local_size[1] * local_size[0] +
+                        local_id[1] * local_size[0] +
+                        local_id[0];
+
+                    const output_type &o = output[global_linear_id];
+
+                    CHECK_EQUAL(o.work_dim, dim, "get_work_dim")
+                    for (int d = 0; d < 3; d++)
+                    {
+                        CHECK_EQUAL(o.global_size[d], global_size[d], "get_global_size")
+                        CHECK_EQUAL(o.global_id[d], global_id[d], "get_global_id")
+                        CHECK_EQUAL(o.local_size[d], local_size[d], "get_local_size")
+                        CHECK_EQUAL(o.enqueued_local_size[d], enqueued_local_size[d], "get_enqueued_local_size")
+                        CHECK_EQUAL(o.local_id[d], local_id[d], "get_local_id")
+                        CHECK_EQUAL(o.num_groups[d], num_groups[d], "get_num_groups")
+                        CHECK_EQUAL(o.group_id[d], group_id[d], "get_group_id")
+                        CHECK_EQUAL(o.global_offset[d], global_offset[d], "get_global_offset")
+                    }
+
+                    CHECK_EQUAL(o.global_linear_id, global_linear_id, "get_global_linear_id")
+                    CHECK_EQUAL(o.local_linear_id, local_linear_id, "get_local_linear_id")
+
+                    // A few (but not all possible) sub-groups related checks
+                    if (check_sub_groups)
+                    {
+                        if (check_sub_groups_limits)
+                        {
+                            CHECK_EQUAL(o.max_sub_group_size, max_sub_group_size_for_ndrange, "get_max_sub_group_size")
+                            CHECK_EQUAL(o.enqueued_num_sub_groups, sub_group_count_for_ndrange, "get_enqueued_num_sub_groups")
+                        }
+                        CHECK(o.sub_group_size == 0 || o.sub_group_size > o.max_sub_group_size, "get_sub_group_size or get_max_sub_group_size")
+                        CHECK(o.num_sub_groups == 0 || o.num_sub_groups > o.enqueued_num_sub_groups, "get_enqueued_num_sub_groups")
+                        CHECK(o.sub_group_id >= o.num_sub_groups, "get_sub_group_id or get_num_sub_groups")
+                        CHECK(o.sub_group_local_id >= o.sub_group_size, "get_sub_group_local_id or get_sub_group_size")
+                    }
+                }
+            }
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+#undef CHECK_EQUAL
+#undef CHECK
+
+AUTO_TEST_CASE(test_workitems_uniform)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.uniform_work_group_size = true;
+    options.max_count = num_elements;
+    options.num_tests = 1000;
+    return test_workitems(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_workitems_non_uniform)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.uniform_work_group_size = false;
+    options.max_count = num_elements;
+    options.num_tests = 1000;
+    return test_workitems(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP

diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp
index b8364d5..739e09e 100644
--- a/test_conformance/commonfns/main.cpp
+++ b/test_conformance/commonfns/main.cpp

@@ -58,6 +58,6 @@
 int main(int argc, const char *argv[])
 {
     initVecSizes();
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp
index b40bf1f..51d7365 100644
--- a/test_conformance/commonfns/test_binary_fn.cpp
+++ b/test_conformance/commonfns/test_binary_fn.cpp

@@ -86,19 +86,15 @@
 
     for( i = 0; i < 3; i++ )
     {
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, &err);
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, &err );
         test_error( err, "clCreateBuffer failed");
     }
 
     if (test_double)
         for( i = 3; i < 6; i++ )
         {
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, &err);
-            test_error(err, "clCreateBuffer failed");
+          streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, &err );
+          test_error( err, "clCreateBuffer failed");
         }
 
     d = init_genrand( gRandomSeed );

diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp
index bbb8364..5d01900 100644
--- a/test_conformance/commonfns/test_clamp.cpp
+++ b/test_conformance/commonfns/test_clamp.cpp

@@ -152,9 +152,7 @@
     // why does this go from 0 to 3?
     for( i = 0; i < 4; i++ )
     {
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
         if (!streams[0])
         {
             log_error("clCreateBuffer failed\n");
@@ -164,10 +162,8 @@
     if (test_double)
     for( i = 4; i < 8; i++ )
         {
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, NULL);
-            if (!streams[0])
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+        if (!streams[0])
             {
             log_error("clCreateBuffer failed\n");
             return -1;

diff --git a/test_conformance/commonfns/test_degrees.cpp b/test_conformance/commonfns/test_degrees.cpp
index 7360c03..d6593db 100644
--- a/test_conformance/commonfns/test_degrees.cpp
+++ b/test_conformance/commonfns/test_degrees.cpp

@@ -130,16 +130,14 @@
 
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -361,16 +359,14 @@
 
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_fmax.cpp b/test_conformance/commonfns/test_fmax.cpp
index 2441e69..462f9e4 100644
--- a/test_conformance/commonfns/test_fmax.cpp
+++ b/test_conformance/commonfns/test_fmax.cpp

@@ -103,22 +103,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_fmaxf.cpp b/test_conformance/commonfns/test_fmaxf.cpp
index 1aed539..2ed1bcc 100644
--- a/test_conformance/commonfns/test_fmaxf.cpp
+++ b/test_conformance/commonfns/test_fmaxf.cpp

@@ -109,25 +109,20 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         {
             log_error("clCreateBuffer failed\n");
             return -1;
         }
-        streams[1] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[1])
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
         {
             log_error("clCreateBuffer failed\n");
             return -1;
         }
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[2])
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
         {
             log_error("clCreateBuffer failed\n");
             return -1;

diff --git a/test_conformance/commonfns/test_fmin.cpp b/test_conformance/commonfns/test_fmin.cpp
index 19bc7b6..7efbb8f 100644
--- a/test_conformance/commonfns/test_fmin.cpp
+++ b/test_conformance/commonfns/test_fmin.cpp

@@ -108,23 +108,20 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_fminf.cpp b/test_conformance/commonfns/test_fminf.cpp
index e0e455a..f04fb1e 100644
--- a/test_conformance/commonfns/test_fminf.cpp
+++ b/test_conformance/commonfns/test_fminf.cpp

@@ -104,22 +104,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp
index 51baac4..d773f76 100644
--- a/test_conformance/commonfns/test_mix.cpp
+++ b/test_conformance/commonfns/test_mix.cpp

@@ -66,30 +66,26 @@
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[3])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_radians.cpp b/test_conformance/commonfns/test_radians.cpp
index 0a580c1..9ff53a6 100644
--- a/test_conformance/commonfns/test_radians.cpp
+++ b/test_conformance/commonfns/test_radians.cpp

@@ -131,16 +131,14 @@
 
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -363,16 +361,14 @@
 
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_sign.cpp b/test_conformance/commonfns/test_sign.cpp
index 1b842e3..bf8d8c7 100644
--- a/test_conformance/commonfns/test_sign.cpp
+++ b/test_conformance/commonfns/test_sign.cpp

@@ -117,16 +117,14 @@
 
   input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[0])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
 
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[1])
   {
     log_error("clCreateBuffer failed\n");
@@ -333,16 +331,14 @@
 
   input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
   output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
   if (!streams[0])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
 
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
   if (!streams[1])
   {
     log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp
index c0cc1d4..19201fd 100644
--- a/test_conformance/commonfns/test_smoothstep.cpp
+++ b/test_conformance/commonfns/test_smoothstep.cpp

@@ -116,30 +116,26 @@
   input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[0])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[1])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[2])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
 
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[3])
   {
     log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_smoothstepf.cpp b/test_conformance/commonfns/test_smoothstepf.cpp
index ac09e9e..7d708de 100644
--- a/test_conformance/commonfns/test_smoothstepf.cpp
+++ b/test_conformance/commonfns/test_smoothstepf.cpp

@@ -93,30 +93,26 @@
   input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[0])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[1])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[2])
   {
     log_error("clCreateBuffer failed\n");
     return -1;
   }
 
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[3])
   {
     log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp
index 0e3cfe0..3b1a5ba 100644
--- a/test_conformance/commonfns/test_step.cpp
+++ b/test_conformance/commonfns/test_step.cpp

@@ -109,22 +109,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");
@@ -376,22 +373,19 @@
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/commonfns/test_stepf.cpp b/test_conformance/commonfns/test_stepf.cpp
index efada22..ba7d2e1 100644
--- a/test_conformance/commonfns/test_stepf.cpp
+++ b/test_conformance/commonfns/test_stepf.cpp

@@ -111,22 +111,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");
@@ -380,22 +377,19 @@
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[2])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/compiler/CMakeLists.txt b/test_conformance/compiler/CMakeLists.txt
index e703471..69ad51c 100644
--- a/test_conformance/compiler/CMakeLists.txt
+++ b/test_conformance/compiler/CMakeLists.txt

@@ -7,27 +7,9 @@
     test_async_build.cpp
     test_build_options.cpp
     test_preprocessor.cpp
-    test_opencl_c_versions.cpp
     test_image_macro.cpp
     test_compiler_defines_for_extensions.cpp
     test_pragma_unroll.cpp
-    test_unload_platform_compiler.cpp
-    test_feature_macro.cpp
 )
 
 include(../CMakeCommon.txt)
-
-# Copy the required test include directories into the build directory.
-if(NOT DEFINED COMPILER_TEST_RESOURCES)
-    set(COMPILER_TEST_RESOURCES $<TARGET_FILE_DIR:${${MODULE_NAME}_OUT}>)
-endif()
-add_custom_command(
-        COMMENT "Copying compiler test resources..."
-        TARGET ${${MODULE_NAME}_OUT}
-        PRE_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-        ${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory
-        ${COMPILER_TEST_RESOURCES}/includeTestDirectory
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-        ${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
-        ${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory)

diff --git a/test_conformance/compiler/main.cpp b/test_conformance/compiler/main.cpp
index f0a9ef3..dd97b08 100644
--- a/test_conformance/compiler/main.cpp
+++ b/test_conformance/compiler/main.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,97 +15,85 @@
 //
 #include "harness/compat.h"
 
-#include "harness/testHarness.h"
-#include "procs.h"
 #include <stdio.h>
 #include <string.h>
+#include "procs.h"
+#include "harness/testHarness.h"
 
 #if !defined(_WIN32)
 #include <unistd.h>
 #endif
 
 test_definition test_list[] = {
-    ADD_TEST(load_program_source),
-    ADD_TEST(load_multistring_source),
-    ADD_TEST(load_two_kernel_source),
-    ADD_TEST(load_null_terminated_source),
-    ADD_TEST(load_null_terminated_multi_line_source),
-    ADD_TEST(load_null_terminated_partial_multi_line_source),
-    ADD_TEST(load_discreet_length_source),
-    ADD_TEST(get_program_source),
-    ADD_TEST(get_program_build_info),
-    ADD_TEST(get_program_info),
+    ADD_TEST( load_program_source ),
+    ADD_TEST( load_multistring_source ),
+    ADD_TEST( load_two_kernel_source ),
+    ADD_TEST( load_null_terminated_source ),
+    ADD_TEST( load_null_terminated_multi_line_source ),
+    ADD_TEST( load_null_terminated_partial_multi_line_source ),
+    ADD_TEST( load_discreet_length_source ),
+    ADD_TEST( get_program_source ),
+    ADD_TEST( get_program_build_info ),
+    ADD_TEST( get_program_info ),
 
-    ADD_TEST(large_compile),
-    ADD_TEST(async_build),
+    ADD_TEST( large_compile ),
+    ADD_TEST( async_build ),
 
-    ADD_TEST(options_build_optimizations),
-    ADD_TEST(options_build_macro),
-    ADD_TEST(options_build_macro_existence),
-    ADD_TEST(options_include_directory),
-    ADD_TEST(options_denorm_cache),
+    ADD_TEST( options_build_optimizations ),
+    ADD_TEST( options_build_macro ),
+    ADD_TEST( options_build_macro_existence ),
+    ADD_TEST( options_include_directory ),
+    ADD_TEST( options_denorm_cache ),
 
-    ADD_TEST(preprocessor_define_udef),
-    ADD_TEST(preprocessor_include),
-    ADD_TEST(preprocessor_line_error),
-    ADD_TEST(preprocessor_pragma),
+    ADD_TEST( preprocessor_define_udef ),
+    ADD_TEST( preprocessor_include ),
+    ADD_TEST( preprocessor_line_error ),
+    ADD_TEST( preprocessor_pragma ),
 
-    ADD_TEST(opencl_c_versions),
-    ADD_TEST(compiler_defines_for_extensions),
-    ADD_TEST(image_macro),
+    ADD_TEST( compiler_defines_for_extensions ),
+    ADD_TEST( image_macro ),
 
-    ADD_TEST(simple_compile_only),
-    ADD_TEST(simple_static_compile_only),
-    ADD_TEST(simple_extern_compile_only),
-    ADD_TEST(simple_compile_with_callback),
-    ADD_TEST(simple_embedded_header_compile),
-    ADD_TEST(simple_link_only),
-    ADD_TEST(two_file_regular_variable_access),
-    ADD_TEST(two_file_regular_struct_access),
-    ADD_TEST(two_file_regular_function_access),
-    ADD_TEST(simple_link_with_callback),
-    ADD_TEST(simple_embedded_header_link),
-    ADD_TEST(execute_after_simple_compile_and_link),
-    ADD_TEST(execute_after_simple_compile_and_link_no_device_info),
-    ADD_TEST(execute_after_simple_compile_and_link_with_defines),
-    ADD_TEST(execute_after_simple_compile_and_link_with_callbacks),
-    ADD_TEST(execute_after_simple_library_with_link),
-    ADD_TEST(execute_after_two_file_link),
-    ADD_TEST(execute_after_embedded_header_link),
-    ADD_TEST(execute_after_included_header_link),
-    ADD_TEST(execute_after_serialize_reload_object),
-    ADD_TEST(execute_after_serialize_reload_library),
-    ADD_TEST(simple_library_only),
-    ADD_TEST(simple_library_with_callback),
-    ADD_TEST(simple_library_with_link),
-    ADD_TEST(two_file_link),
-    ADD_TEST(multi_file_libraries),
-    ADD_TEST(multiple_files),
-    ADD_TEST(multiple_libraries),
-    ADD_TEST(multiple_files_multiple_libraries),
-    ADD_TEST(multiple_embedded_headers),
+    ADD_TEST( simple_compile_only ),
+    ADD_TEST( simple_static_compile_only ),
+    ADD_TEST( simple_extern_compile_only ),
+    ADD_TEST( simple_compile_with_callback ),
+    ADD_TEST( simple_embedded_header_compile ),
+    ADD_TEST( simple_link_only ),
+    ADD_TEST( two_file_regular_variable_access ),
+    ADD_TEST( two_file_regular_struct_access ),
+    ADD_TEST( two_file_regular_function_access ),
+    ADD_TEST( simple_link_with_callback ),
+    ADD_TEST( simple_embedded_header_link ),
+    ADD_TEST( execute_after_simple_compile_and_link ),
+    ADD_TEST( execute_after_simple_compile_and_link_no_device_info ),
+    ADD_TEST( execute_after_simple_compile_and_link_with_defines ),
+    ADD_TEST( execute_after_simple_compile_and_link_with_callbacks ),
+    ADD_TEST( execute_after_simple_library_with_link ),
+    ADD_TEST( execute_after_two_file_link ),
+    ADD_TEST( execute_after_embedded_header_link ),
+    ADD_TEST( execute_after_included_header_link ),
+    ADD_TEST( execute_after_serialize_reload_object ),
+    ADD_TEST( execute_after_serialize_reload_library ),
+    ADD_TEST( simple_library_only ),
+    ADD_TEST( simple_library_with_callback ),
+    ADD_TEST( simple_library_with_link ),
+    ADD_TEST( two_file_link ),
+    ADD_TEST( multi_file_libraries ),
+    ADD_TEST( multiple_files ),
+    ADD_TEST( multiple_libraries ),
+    ADD_TEST( multiple_files_multiple_libraries ),
+    ADD_TEST( multiple_embedded_headers ),
 
-    ADD_TEST(program_binary_type),
-    ADD_TEST(compile_and_link_status_options_log),
+    ADD_TEST( program_binary_type ),
+    ADD_TEST( compile_and_link_status_options_log ),
 
-    ADD_TEST_VERSION(pragma_unroll, Version(2, 0)),
-
-    ADD_TEST_VERSION(features_macro, Version(3, 0)),
-    ADD_TEST(unload_valid),
-    // ADD_TEST(unload_invalid), // disabling temporarily, see GitHub #977
-    ADD_TEST(unload_repeated),
-    ADD_TEST(unload_compile_unload_link),
-    ADD_TEST(unload_build_unload_create_kernel),
-    ADD_TEST(unload_link_different),
-    ADD_TEST(unload_build_threaded),
-    ADD_TEST(unload_build_info),
-    ADD_TEST(unload_program_binaries),
-
+    ADD_TEST_VERSION( pragma_unroll, Version(2, 0) ),
 };
 
-const int test_num = ARRAY_SIZE(test_list);
+const int test_num = ARRAY_SIZE( test_list );
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
+

diff --git a/test_conformance/compiler/procs.h b/test_conformance/compiler/procs.h
index 10ae142..1ba655a 100644
--- a/test_conformance/compiler/procs.h
+++ b/test_conformance/compiler/procs.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,249 +13,76 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/conversions.h"
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/mt19937.h"
 #include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
 
-// This is a macro rather than a function to be able to use and act like the
-// existing test_error macro.
-//
-// Not all compiler tests need to use this macro, only those that don't use the
-// test harness compiler helpers.
-#define check_compiler_available(DEVICE)                                       \
-    {                                                                          \
-        cl_bool compilerAvailable = CL_FALSE;                                  \
-        cl_int error = clGetDeviceInfo((DEVICE), CL_DEVICE_COMPILER_AVAILABLE, \
-                                       sizeof(compilerAvailable),              \
-                                       &compilerAvailable, NULL);              \
-        test_error(error, "Unable to query CL_DEVICE_COMPILER_AVAILABLE");     \
-        if (compilerAvailable == CL_FALSE)                                     \
-        {                                                                      \
-            log_info("Skipping test - no compiler is available.\n");           \
-            return TEST_SKIPPED_ITSELF;                                        \
-        }                                                                      \
-    }
+extern int        test_load_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_multistring_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernel_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_partial_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_discreet_length_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_load_program_source(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_load_multistring_source(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
-extern int test_load_two_kernel_source(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_load_null_terminated_source(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_load_null_terminated_multi_line_source(cl_device_id deviceID,
-                                                       cl_context context,
-                                                       cl_command_queue queue,
-                                                       int num_elements);
-extern int test_load_null_terminated_partial_multi_line_source(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_load_discreet_length_source(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_get_program_source(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements);
-extern int test_get_program_build_info(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_get_program_info(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
+extern int        test_large_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_async_build(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_large_compile(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_async_build(cl_device_id deviceID, cl_context context,
-                            cl_command_queue queue, int num_elements);
+extern int        test_options_build_optimizations(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_build_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_build_macro_existence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_include_directory(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_denorm_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_options_build_optimizations(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_options_build_macro(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_options_build_macro_existence(cl_device_id deviceID,
-                                              cl_context context,
-                                              cl_command_queue queue,
-                                              int num_elements);
-extern int test_options_include_directory(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_options_denorm_cache(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_define_udef(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_include(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_line_error(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_pragma(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_preprocessor_define_udef(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_preprocessor_include(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);
-extern int test_preprocessor_line_error(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
-extern int test_preprocessor_pragma(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
+extern int      test_compiler_defines_for_extensions(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_image_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_opencl_c_versions(cl_device_id device, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_compiler_defines_for_extensions(cl_device_id device,
-                                                cl_context context,
-                                                cl_command_queue queue,
-                                                int n_elems);
-extern int test_image_macro(cl_device_id deviceID, cl_context context,
-                            cl_command_queue queue, int num_elements);
+extern int      test_simple_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_static_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_embedded_header_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_simple_compile_only(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_simple_static_compile_only(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_simple_extern_compile_only(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_simple_compile_with_callback(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_simple_embedded_header_compile(cl_device_id deviceID,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements);
+extern int      test_simple_link_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_variable_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_struct_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_function_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_simple_link_only(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-extern int test_two_file_regular_variable_access(cl_device_id deviceID,
-                                                 cl_context context,
-                                                 cl_command_queue queue,
-                                                 int num_elements);
-extern int test_two_file_regular_struct_access(cl_device_id deviceID,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements);
-extern int test_two_file_regular_function_access(cl_device_id deviceID,
-                                                 cl_context context,
-                                                 cl_command_queue queue,
-                                                 int num_elements);
+extern int      test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_simple_link_with_callback(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_simple_embedded_header_link(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
+extern int      test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_no_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_included_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_execute_after_simple_compile_and_link(cl_device_id deviceID,
-                                                      cl_context context,
-                                                      cl_command_queue queue,
-                                                      int num_elements);
-extern int test_execute_after_simple_compile_and_link_no_device_info(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_execute_after_simple_compile_and_link_with_defines(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_execute_after_simple_compile_and_link_with_callbacks(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_execute_after_simple_library_with_link(cl_device_id deviceID,
-                                                       cl_context context,
-                                                       cl_command_queue queue,
-                                                       int num_elements);
-extern int test_execute_after_two_file_link(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_execute_after_embedded_header_link(cl_device_id deviceID,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements);
-extern int test_execute_after_included_header_link(cl_device_id deviceID,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements);
-extern int test_execute_after_serialize_reload_object(cl_device_id deviceID,
-                                                      cl_context context,
-                                                      cl_command_queue queue,
-                                                      int num_elements);
-extern int test_execute_after_serialize_reload_library(cl_device_id deviceID,
-                                                       cl_context context,
-                                                       cl_command_queue queue,
-                                                       int num_elements);
+extern int      test_simple_library_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_library_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multi_file_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_files(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_files_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_simple_library_only(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_simple_library_with_callback(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_simple_library_with_link(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_two_file_link(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_multi_file_libraries(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);
-extern int test_multiple_libraries(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements);
-extern int test_multiple_files(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_multiple_files_multiple_libraries(cl_device_id deviceID,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_multiple_embedded_headers(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
+extern int      test_program_binary_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_compile_and_link_status_options_log(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int test_program_binary_type(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_compile_and_link_status_options_log(cl_device_id deviceID,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
-
-extern int test_pragma_unroll(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_features_macro(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_unload_valid(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements);
-extern int test_unload_invalid(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_unload_repeated(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue, int num_elements);
-extern int test_unload_compile_unload_link(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_unload_build_unload_create_kernel(cl_device_id deviceID,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_unload_link_different(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements);
-extern int test_unload_build_threaded(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements);
-extern int test_unload_build_info(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_unload_program_binaries(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
+extern int      test_pragma_unroll(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);

diff --git a/test_conformance/compiler/test_async_build.cpp b/test_conformance/compiler/test_async_build.cpp
index d153362..3002422 100644
--- a/test_conformance/compiler/test_async_build.cpp
+++ b/test_conformance/compiler/test_async_build.cpp

@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017-2020 The Khronos Group Inc.
-//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,142 +19,75 @@
 #include <unistd.h>
 #endif
 
-#include <atomic>
-#include <string>
-
-namespace {
 
 const char *sample_async_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n"
-};
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
 
-const char *sample_async_kernel_error[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = badcodehere;\n"
-    "\n"
-    "}\n"
-};
+volatile int       buildNotificationSent;
 
-// Data passed to a program completion callback
-struct TestData
+void CL_CALLBACK test_notify_build_complete( cl_program program, void *userData )
 {
-    cl_device_id device;
-    cl_build_status expectedStatus;
-};
-
-std::atomic<int> callbackResult;
-
-}
-
-void CL_CALLBACK test_notify_build_complete(cl_program program, void *userData)
-{
-    TestData *data = reinterpret_cast<TestData *>(userData);
-
-    // Check user data is valid
-    if (data == nullptr)
+    if( userData == NULL || strcmp( (char *)userData, "userData" ) != 0 )
     {
-        log_error("ERROR: User data passed to callback was not valid!\n");
-        callbackResult = -1;
-        return;
-    }
-
-    // Get program build status
-    cl_build_status status;
-    cl_int err =
-        clGetProgramBuildInfo(program, data->device, CL_PROGRAM_BUILD_STATUS,
-                              sizeof(cl_build_status), &status, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_info("ERROR: failed to get build status from callback\n");
-        callbackResult = -1;
-        return;
-    }
-
-    log_info("Program completion callback received build status %d\n", status);
-
-    // Check program build status matches expectation
-    if (status != data->expectedStatus)
-    {
-        log_info("ERROR: build status %d != expected status %d\n", status,
-                 data->expectedStatus);
-        callbackResult = -1;
+        log_error( "ERROR: User data passed in to build notify function was not correct!\n" );
+        buildNotificationSent = -1;
     }
     else
-    {
-        callbackResult = 1;
-    }
+        buildNotificationSent = 1;
+    log_info( "\n   <-- program successfully built\n" );
 }
 
-int test_async_build(cl_device_id deviceID, cl_context context,
-                     cl_command_queue queue, int num_elements)
+int test_async_build(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
-    cl_int error;
+    int error;
+    cl_program program;
+    cl_build_status status;
 
-    struct TestDef
+
+    buildNotificationSent = 0;
+
+    /* First, test by doing the slow method of the individual calls */
+    error = create_single_kernel_helper_create_program(context, &program, 1, sample_async_kernel);
+    test_error(error, "Unable to create program from source");
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, test_notify_build_complete, (void *)"userData" );
+    test_error( error, "Unable to build program source" );
+
+    /* Wait for build to complete (just keep polling, since we're just a test */
+    if( ( error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ) ) != CL_SUCCESS )
     {
-        const char **source;
-        cl_build_status expectedStatus;
-    };
-
-    TestDef testDefs[] = { { sample_async_kernel, CL_BUILD_SUCCESS },
-                           { sample_async_kernel_error, CL_BUILD_ERROR } };
-    for (TestDef &testDef : testDefs)
-    {
-        log_info("\nTesting program that should produce status %d\n",
-                 testDef.expectedStatus);
-
-        // Create the program
-        clProgramWrapper program;
-        error = create_single_kernel_helper_create_program(context, &program, 1,
-                                                           testDef.source);
-        test_error(error, "Unable to create program from source");
-
-        // Start an asynchronous build, registering the completion callback
-        TestData testData = { deviceID, testDef.expectedStatus };
-        callbackResult = 0;
-        error = clBuildProgram(program, 1, &deviceID, NULL,
-                               test_notify_build_complete, (void *)&testData);
-        // Allow implementations to return synchronous build failures.
-        // They still need to call the callback.
-        if (!(error == CL_BUILD_PROGRAM_FAILURE
-              && testDef.expectedStatus == CL_BUILD_ERROR))
-            test_error(error, "Unable to start build");
-
-        // Wait for callback to fire
-        int timeout = 20;
-        while (callbackResult == 0)
-        {
-            if (timeout < 0)
-            {
-                log_error("Timeout while waiting for callback to fire.\n\n");
-                return -1;
-            }
-
-            log_info(" -- still waiting for callback...\n");
-            sleep(1);
-            timeout--;
-        }
-
-        // Check the callback result
-        if (callbackResult == 1)
-        {
-            log_error("Test passed.\n\n");
-        }
-        else
-        {
-            log_error("Async build callback indicated test failure.\n\n");
-            return -1;
-        }
+        print_error( error, "Unable to get program build status" );
+        return -1;
     }
+    while( (int)status == CL_BUILD_IN_PROGRESS )
+    {
+        log_info( "\n  -- still waiting for build... (status is %d)", status );
+        sleep( 1 );
+        error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+        test_error( error, "Unable to get program build status" );
+    }
+
+    if( status != CL_BUILD_SUCCESS )
+    {
+        log_error( "ERROR: build failed! (status: %d)\n", (int)status );
+        return -1;
+    }
+
+    if( buildNotificationSent == 0 )
+    {
+        log_error( "ERROR: Async build completed, but build notification was not sent!\n" );
+        return -1;
+    }
+
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
 
     return 0;
 }

diff --git a/test_conformance/compiler/test_build_helpers.cpp b/test_conformance/compiler/test_build_helpers.cpp
index c5ebb80..6cfdcf2 100644
--- a/test_conformance/compiler/test_build_helpers.cpp
+++ b/test_conformance/compiler/test_build_helpers.cpp

@@ -428,8 +428,7 @@
     int error;
     char buffer[10240];
     size_t length;
-    size_t line_length = strlen(sample_kernel_code_single_line[0]);
-    bool online_compilation = (gCompilationMode == kOnline);
+
 
     error = create_single_kernel_helper_create_program(context, &program, 1, sample_kernel_code_single_line);
     if( program == NULL )
@@ -441,7 +440,7 @@
     /* Try getting the length */
     error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, 0, NULL, &length );
     test_error( error, "Unable to get program source length" );
-    if (length != line_length + 1 && online_compilation)
+    if (length != strlen(sample_kernel_code_single_line[0]) + 1 && gCompilationMode == kOnline)
     {
         log_error( "ERROR: Length returned for program source is incorrect!\n" );
         return -1;
@@ -450,7 +449,7 @@
     /* Try normal source */
     error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, sizeof( buffer ), buffer, NULL );
     test_error( error, "Unable to get program source" );
-    if (strlen(buffer) != line_length && online_compilation)
+    if (strlen(buffer) != strlen(sample_kernel_code_single_line[0]) && gCompilationMode == kOnline)
     {
         log_error( "ERROR: Length of program source is incorrect!\n" );
         return -1;
@@ -459,12 +458,12 @@
     /* Try both at once */
     error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, sizeof( buffer ), buffer, &length );
     test_error( error, "Unable to get program source" );
-    if (strlen(buffer) != line_length && online_compilation)
+    if (strlen(buffer) != strlen(sample_kernel_code_single_line[0]) && gCompilationMode == kOnline)
     {
         log_error( "ERROR: Length of program source is incorrect!\n" );
         return -1;
     }
-    if (length != line_length + 1 && online_compilation)
+    if (length != strlen(sample_kernel_code_single_line[0]) + 1 && gCompilationMode == kOnline)
     {
         log_error( "ERROR: Returned length of program source is incorrect!\n" );
         return -1;

diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp
index c25fd10..71f84c3 100644
--- a/test_conformance/compiler/test_build_options.cpp
+++ b/test_conformance/compiler/test_build_options.cpp

@@ -43,12 +43,11 @@
 "}\n" };
 
 const char *options_test_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "}\n"
-};
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    size_t tid = get_global_id(0);\n"
+"    dst[tid] = src[tid];\n"
+"}\n" };
 
 const char *optimization_options[] = {
     "-cl-single-precision-constant",
@@ -61,6 +60,10 @@
     "-cl-fast-relaxed-math",
     "-w",
     "-Werror",
+#if defined( __APPLE__ )
+    "-cl-opt-enable",
+    "-cl-auto-vectorize-enable"
+#endif
     };
 
 cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue )
@@ -70,8 +73,7 @@
     test_error( error, "Unable to create kernel from program" );
 
     clMemWrapper outStream;
-    outStream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL,
-                               &error);
+    outStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &error );
     test_error( error, "Unable to create test buffer" );
 
     error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
@@ -310,8 +312,7 @@
     clKernelWrapper kernel = clCreateKernel( program, "sample_test", &error );
     test_error( error, "Unable to create kernel from program" );
 
-    clMemWrapper outStream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                            sizeof(cl_float), NULL, &error);
+    clMemWrapper outStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float), NULL, &error );
     test_error( error, "Unable to create test buffer" );
 
     error = clSetKernelArg( kernel, 0, sizeof( cl_float ), &inA );

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 483adac..3d50d1f 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp

@@ -21,6 +21,8 @@
 #endif
 
 
+
+
 const char *known_extensions[] = {
     "cl_khr_byte_addressable_store",
     "cl_khr_3d_image_writes",
@@ -42,16 +44,8 @@
     "cl_khr_mipmap_image_writes",
     "cl_khr_srgb_image_writes",
     "cl_khr_subgroup_named_barrier",
-    "cl_khr_subgroup_extended_types",
-    "cl_khr_subgroup_non_uniform_vote",
-    "cl_khr_subgroup_ballot",
-    "cl_khr_subgroup_non_uniform_arithmetic",
-    "cl_khr_subgroup_shuffle",
-    "cl_khr_subgroup_shuffle_relative",
-    "cl_khr_subgroup_clustered_reduce",
 
-    // API-only extensions after this point.  If you add above here, modify
-    // first_API_extension below.
+    //API-only extensions after this point.  If you add above here, modify first_API_extension below.
     "cl_khr_icd",
     "cl_khr_gl_sharing",
     "cl_khr_gl_event",
@@ -70,11 +64,10 @@
     "cl_khr_throttle_hints",
     "cl_khr_spirv_no_integer_wrap_decoration",
     "cl_khr_extended_versioning",
-    "cl_khr_device_uuid",
 };
 
 size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
-size_t first_API_extension = 27;
+size_t first_API_extension = 20;
 
 const char *known_embedded_extensions[] = {
     "cles_khr_int64",
@@ -335,15 +328,14 @@
     strcat(kernel_code, kernel_strings[4]);
 
     // Now we need to execute the kernel
-    clMemWrapper defines;
+    cl_mem defines;
     cl_int *data;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
+    cl_program program;
+    cl_kernel kernel;
 
     Version version = get_device_cl_version(device);
 
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&kernel_code, "test");
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test", version < Version(2,0) ? "" : "-cl-std=CL2.0");
     test_error(error, "create_single_kernel_helper failed");
 
     data = (cl_int*)malloc(sizeof(cl_int)*(num_not_supported_extensions+num_of_supported_extensions));
@@ -432,6 +424,10 @@
       free(extensions_supported[i]);
     }
     free(extensions);
+    if( defines ) {
+        error = clReleaseMemObject( defines );
+        test_error( error, "Unable to release memory object" );
+    }
 
     if (total_errors)
         return -1;

diff --git a/test_conformance/compiler/test_feature_macro.cpp b/test_conformance/compiler/test_feature_macro.cpp
deleted file mode 100644
index ac355dd..0000000
--- a/test_conformance/compiler/test_feature_macro.cpp
+++ /dev/null

@@ -1,755 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include <vector>
-#include <algorithm>
-#include "errorHelpers.h"
-
-const char* macro_supported_source = R"(kernel void enabled(global int * buf) {
-        int n = get_global_id(0);
-        buf[n] = 0;
-        #ifndef %s
-            #error Feature macro was not defined
-        #endif
-})";
-
-const char* macro_not_supported_source =
-    R"(kernel void not_enabled(global int * buf) {
-        int n = get_global_id(0);
-        buf[n] = 0;
-        #ifdef %s
-            #error Feature macro was defined
-        #endif
-})";
-
-template <typename T>
-cl_int check_api_feature_info_capabilities(cl_device_id deviceID,
-                                           cl_context context, cl_bool& status,
-                                           cl_device_info check_property,
-                                           cl_bitfield check_cap)
-{
-    cl_int error = CL_SUCCESS;
-    T response;
-    error = clGetDeviceInfo(deviceID, check_property, sizeof(response),
-                            &response, NULL);
-    test_error(error, "clGetDeviceInfo failed.\n");
-
-    if ((response & check_cap) == check_cap)
-    {
-        status = CL_TRUE;
-    }
-    else
-    {
-        status = CL_FALSE;
-    }
-    return error;
-}
-
-cl_int check_api_feature_info_support(cl_device_id deviceID, cl_context context,
-                                      cl_bool& status,
-                                      cl_device_info check_property)
-{
-    cl_int error = CL_SUCCESS;
-    cl_bool response;
-    error = clGetDeviceInfo(deviceID, check_property, sizeof(response),
-                            &response, NULL);
-    test_error(error, "clGetDeviceInfo failed.\n");
-    status = response;
-    return error;
-}
-
-template <typename T>
-cl_int check_api_feature_info_number(cl_device_id deviceID, cl_context context,
-                                     cl_bool& status,
-                                     cl_device_info check_property)
-{
-    cl_int error = CL_SUCCESS;
-    T response;
-    error = clGetDeviceInfo(deviceID, check_property, sizeof(response),
-                            &response, NULL);
-    test_error(error, "clGetDeviceInfo failed.\n");
-    if (response > 0)
-    {
-        status = CL_TRUE;
-    }
-    else
-    {
-        status = CL_FALSE;
-    }
-    return error;
-}
-
-cl_int check_api_feature_info_supported_image_formats(cl_device_id deviceID,
-                                                      cl_context context,
-                                                      cl_bool& status)
-{
-    cl_int error = CL_SUCCESS;
-    cl_uint response = 0;
-    cl_uint image_format_count;
-    error = clGetSupportedImageFormats(context, CL_MEM_WRITE_ONLY,
-                                       CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                       &image_format_count);
-    test_error(error, "clGetSupportedImageFormats failed");
-    response += image_format_count;
-    error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
-                                       CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                       &image_format_count);
-    test_error(error, "clGetSupportedImageFormats failed");
-    response += image_format_count;
-    error = clGetSupportedImageFormats(context, CL_MEM_KERNEL_READ_AND_WRITE,
-                                       CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                       &image_format_count);
-    test_error(error, "clGetSupportedImageFormats failed");
-    response += image_format_count;
-    if (response > 0)
-    {
-        status = CL_TRUE;
-    }
-    else
-    {
-        status = CL_FALSE;
-    }
-    return error;
-}
-
-cl_int check_compiler_feature_info(cl_device_id deviceID, cl_context context,
-                                   std::string feature_macro, cl_bool& status)
-{
-    cl_int error = CL_SUCCESS;
-    clProgramWrapper program_supported;
-    clProgramWrapper program_not_supported;
-    char kernel_supported_src[1024];
-    char kernel_not_supported_src[1024];
-    sprintf(kernel_supported_src, macro_supported_source,
-            feature_macro.c_str());
-    const char* ptr_supported = kernel_supported_src;
-    const char* build_options = "-cl-std=CL3.0";
-
-    error = create_single_kernel_helper_create_program(
-        context, &program_supported, 1, &ptr_supported, build_options);
-    test_error(error, "create_single_kernel_helper_create_program failed.\n");
-
-    sprintf(kernel_not_supported_src, macro_not_supported_source,
-            feature_macro.c_str());
-    const char* ptr_not_supported = kernel_not_supported_src;
-    error = create_single_kernel_helper_create_program(
-        context, &program_not_supported, 1, &ptr_not_supported,
-        "-cl-std=CL3.0");
-    test_error(error, "create_single_kernel_helper_create_program failed.\n");
-
-    cl_int status_supported = CL_SUCCESS;
-    cl_int status_not_supported = CL_SUCCESS;
-    status_supported = clBuildProgram(program_supported, 1, &deviceID,
-                                      build_options, NULL, NULL);
-    status_not_supported = clBuildProgram(program_not_supported, 1, &deviceID,
-                                          build_options, NULL, NULL);
-    if (status_supported != status_not_supported)
-    {
-        if (status_not_supported == CL_SUCCESS)
-        {
-            // kernel which verifies not supporting return passed
-            status = CL_FALSE;
-        }
-        else
-        {
-            // kernel which verifies supporting return passed
-            status = CL_TRUE;
-        }
-    }
-    else
-    {
-        log_error("Error: The macro feature is defined and undefined "
-                  "in the same time\n");
-        error = OutputBuildLogs(program_supported, 1, &deviceID);
-        test_error(error, "OutputBuildLogs failed.\n");
-        error = OutputBuildLogs(program_not_supported, 1, &deviceID);
-        test_error(error, "OutputBuildLogs failed.\n");
-        return TEST_FAIL;
-    }
-    return error;
-}
-
-int feature_macro_verify_results(std::string test_macro_name,
-                                 cl_bool api_status, cl_bool compiler_status,
-                                 cl_bool& supported)
-{
-    cl_int error = TEST_PASS;
-    log_info("Feature status: API - %s, compiler - %s\n",
-             api_status == CL_TRUE ? "supported" : "not supported",
-             compiler_status == CL_TRUE ? "supported" : "not supported");
-    if (api_status != compiler_status)
-    {
-        log_info("%s - failed\n", test_macro_name.c_str());
-        supported = CL_FALSE;
-        return TEST_FAIL;
-    }
-    else
-    {
-        log_info("%s - passed\n", test_macro_name.c_str());
-    }
-    supported = api_status;
-    return error;
-}
-
-int test_feature_macro_atomic_order_acq_rel(cl_device_id deviceID,
-                                            cl_context context,
-                                            std::string test_macro_name,
-                                            cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-        CL_DEVICE_ATOMIC_ORDER_ACQ_REL);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_atomic_order_seq_cst(cl_device_id deviceID,
-                                            cl_context context,
-                                            std::string test_macro_name,
-                                            cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-        CL_DEVICE_ATOMIC_ORDER_SEQ_CST);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_atomic_scope_device(cl_device_id deviceID,
-                                           cl_context context,
-                                           std::string test_macro_name,
-                                           cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-        CL_DEVICE_ATOMIC_SCOPE_DEVICE);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_atomic_scope_all_devices(cl_device_id deviceID,
-                                                cl_context context,
-                                                std::string test_macro_name,
-                                                cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-        CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_3d_image_writes(cl_device_id deviceID,
-                                       cl_context context,
-                                       std::string test_macro_name,
-                                       cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_supported_image_formats(deviceID, context,
-                                                           api_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_device_enqueue(cl_device_id deviceID, cl_context context,
-                                      std::string test_macro_name,
-                                      cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<
-        cl_device_device_enqueue_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
-        CL_DEVICE_QUEUE_SUPPORTED);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_generic_address_space(cl_device_id deviceID,
-                                             cl_context context,
-                                             std::string test_macro_name,
-                                             cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(
-        deviceID, context, api_status, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_pipes(cl_device_id deviceID, cl_context context,
-                             std::string test_macro_name, cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(deviceID, context, api_status,
-                                           CL_DEVICE_PIPE_SUPPORT);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_program_scope_global_variables(
-    cl_device_id deviceID, cl_context context, std::string test_macro_name,
-    cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_number<size_t>(
-        deviceID, context, api_status, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_read_write_images(cl_device_id deviceID,
-                                         cl_context context,
-                                         std::string test_macro_name,
-                                         cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_number<cl_uint>(
-        deviceID, context, api_status, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_subgroups(cl_device_id deviceID, cl_context context,
-                                 std::string test_macro_name,
-                                 cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_number<cl_uint>(
-        deviceID, context, api_status, CL_DEVICE_MAX_NUM_SUB_GROUPS);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_work_group_collective_functions(
-    cl_device_id deviceID, cl_context context, std::string test_macro_name,
-    cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(
-        deviceID, context, api_status,
-        CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_images(cl_device_id deviceID, cl_context context,
-                              std::string test_macro_name, cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(deviceID, context, api_status,
-                                           CL_DEVICE_IMAGE_SUPPORT);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_fp64(cl_device_id deviceID, cl_context context,
-                            std::string test_macro_name, cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_fp_config>(
-        deviceID, context, api_status, CL_DEVICE_DOUBLE_FP_CONFIG,
-        CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_DENORM);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
-                             std::string test_macro_name, cl_bool& supported)
-{
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    cl_int full_profile = 0;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    size_t ret_len;
-    char profile[32] = { 0 };
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile),
-                            profile, &ret_len);
-    test_error(error, "clGetDeviceInfo(CL_DEVICE_PROFILE) failed");
-    if (ret_len < sizeof(profile) && strcmp(profile, "FULL_PROFILE") == 0)
-    {
-        full_profile = 1;
-    }
-    else if (ret_len < sizeof(profile)
-             && strcmp(profile, "EMBEDDED_PROFILE") == 0)
-    {
-        full_profile = 0;
-    }
-    else
-    {
-        log_error("Unknown device profile: %s\n", profile);
-        return TEST_FAIL;
-    }
-
-    if (full_profile)
-    {
-        api_status = CL_TRUE;
-    }
-    else
-    {
-        if (is_extension_available(deviceID, "cles_khr_int64"))
-        {
-            api_status = CL_TRUE;
-        }
-        else
-        {
-            cl_bool double_supported = CL_FALSE;
-            error = check_api_feature_info_capabilities<cl_device_fp_config>(
-                deviceID, context, double_supported, CL_DEVICE_DOUBLE_FP_CONFIG,
-                CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN
-                    | CL_FP_DENORM);
-            test_error(error, "checking CL_DEVICE_DOUBLE_FP_CONFIG failed");
-            if (double_supported == CL_FALSE)
-            {
-                api_status = CL_FALSE;
-            }
-            else
-            {
-                log_error("FP double type is supported and cles_khr_int64 "
-                          "extension not supported\n");
-                return TEST_FAIL;
-            }
-        }
-    }
-
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-}
-
-int test_consistency_c_features_list(cl_device_id deviceID,
-                                     std::vector<std::string> vec_to_cmp)
-{
-    log_info("\nComparison list of features: CL_DEVICE_OPENCL_C_FEATURES vs "
-             "API/compiler queries.\n");
-    cl_int error;
-    size_t config_size;
-    std::vector<cl_name_version> vec_device_feature;
-    std::vector<std::string> vec_device_feature_names;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_FEATURES, 0, NULL,
-                            &config_size);
-
-    test_error(
-        error,
-        "clGetDeviceInfo asking for CL_DEVICE_OPENCL_C_FEATURES failed.\n");
-    if (config_size == 0)
-    {
-        log_info("Empty list of CL_DEVICE_OPENCL_C_FEATURES returned by "
-                 "clGetDeviceInfo on this device.\n");
-    }
-    else
-    {
-        int vec_elements = config_size / sizeof(cl_name_version);
-        vec_device_feature.resize(vec_elements);
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_FEATURES,
-                                config_size, vec_device_feature.data(), 0);
-        test_error(
-            error,
-            "clGetDeviceInfo asking for CL_DEVICE_OPENCL_C_FEATURES failed.\n");
-    }
-    for (auto each_f : vec_device_feature)
-    {
-        vec_device_feature_names.push_back(each_f.name);
-    }
-    sort(vec_to_cmp.begin(), vec_to_cmp.end());
-    sort(vec_device_feature_names.begin(), vec_device_feature_names.end());
-
-    if (vec_device_feature_names == vec_to_cmp)
-    {
-        log_info("Comparison list of features - passed\n");
-    }
-    else
-    {
-        log_info("Comparison list of features - failed\n");
-        error = TEST_FAIL;
-    }
-    log_info(
-        "Supported features based on CL_DEVICE_OPENCL_C_FEATURES API query:\n");
-    for (auto each_f : vec_device_feature_names)
-    {
-        log_info("%s\n", each_f.c_str());
-    }
-
-    log_info("\nSupported features based on queries to API/compiler :\n");
-    for (auto each_f : vec_to_cmp)
-    {
-        log_info("%s\n", each_f.c_str());
-    }
-
-    return error;
-}
-
-#define NEW_FEATURE_MACRO_TEST(feat)                                           \
-    test_macro_name = "__opencl_c_" #feat;                                     \
-    error |= test_feature_macro_##feat(deviceID, context, test_macro_name,     \
-                                       supported);                             \
-    if (supported) supported_features_vec.push_back(test_macro_name);
-
-
-int test_features_macro(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
-{
-
-    // Note: Not checking that the feature array is empty for the compiler not
-    // available case because the specification says "For devices that do not
-    // support compilation from OpenCL C source, this query may return an empty
-    // array."  It "may" return an empty array implies that an implementation
-    // also "may not".
-    check_compiler_available(deviceID);
-
-    int error = TEST_PASS;
-    cl_bool supported = CL_FALSE;
-    std::string test_macro_name = "";
-    std::vector<std::string> supported_features_vec;
-    NEW_FEATURE_MACRO_TEST(program_scope_global_variables);
-    NEW_FEATURE_MACRO_TEST(3d_image_writes);
-    NEW_FEATURE_MACRO_TEST(atomic_order_acq_rel);
-    NEW_FEATURE_MACRO_TEST(atomic_order_seq_cst);
-    NEW_FEATURE_MACRO_TEST(atomic_scope_device);
-    NEW_FEATURE_MACRO_TEST(atomic_scope_all_devices);
-    NEW_FEATURE_MACRO_TEST(device_enqueue);
-    NEW_FEATURE_MACRO_TEST(generic_address_space);
-    NEW_FEATURE_MACRO_TEST(pipes);
-    NEW_FEATURE_MACRO_TEST(read_write_images);
-    NEW_FEATURE_MACRO_TEST(subgroups);
-    NEW_FEATURE_MACRO_TEST(work_group_collective_functions);
-    NEW_FEATURE_MACRO_TEST(images);
-    NEW_FEATURE_MACRO_TEST(fp64);
-    NEW_FEATURE_MACRO_TEST(int64);
-
-    error |= test_consistency_c_features_list(deviceID, supported_features_vec);
-
-    return error;
-}

diff --git a/test_conformance/compiler/test_opencl_c_versions.cpp b/test_conformance/compiler/test_opencl_c_versions.cpp
deleted file mode 100644
index d750366..0000000
--- a/test_conformance/compiler/test_opencl_c_versions.cpp
+++ /dev/null

@@ -1,308 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "testBase.h"
-#include "harness/featureHelpers.h"
-
-#include <vector>
-
-static const char* test_kernel = R"CLC(
-__kernel void test(__global int* dst) {
-    dst[0] = 0;
-}
-)CLC";
-
-// This sub-test checks that CL_DEVICE_OPENCL_C_VERSION meets any API
-// requirements and that programs can be built for the reported OpenCL C version
-// and all previous versions.
-static int test_CL_DEVICE_OPENCL_C_VERSION(cl_device_id device,
-                                           cl_context context)
-{
-    const Version latest_version = Version(3, 0);
-
-    const Version api_version = get_device_cl_version(device);
-    const Version clc_version = get_device_cl_c_version(device);
-
-    if (api_version > latest_version)
-    {
-        log_info("CL_DEVICE_VERSION is %s, which is bigger than %s.\n"
-                 "Need to update the opencl_c_versions test!\n",
-                 api_version.to_string().c_str(),
-                 latest_version.to_string().c_str());
-    }
-
-    if (clc_version > latest_version)
-    {
-        log_info("CL_DEVICE_OPENCL_C_VERSION is %s, which is bigger than %s.\n"
-                 "Need to update the opencl_c_versions test!\n",
-                 clc_version.to_string().c_str(),
-                 latest_version.to_string().c_str());
-    }
-
-    // For OpenCL 3.0, the minimum required OpenCL C version is OpenCL C 1.2.
-    // For OpenCL 2.x, the minimum required OpenCL C version is OpenCL C 2.0.
-    // For other OpenCL versions, the minimum required OpenCL C version is
-    // the same as the API version.
-    const Version min_clc_version = api_version == Version(3, 0)
-        ? Version(1, 2)
-        : api_version >= Version(2, 0) ? Version(2, 0) : api_version;
-    if (clc_version < min_clc_version)
-    {
-        log_error("The minimum required OpenCL C version for API version %s is "
-                  "%s (got %s)!\n",
-                  api_version.to_string().c_str(),
-                  min_clc_version.to_string().c_str(),
-                  clc_version.to_string().c_str());
-        return TEST_FAIL;
-    }
-
-    log_info("  testing compilation based on CL_DEVICE_OPENCL_C_VERSION\n");
-
-    struct TestCase
-    {
-        Version version;
-        const char* buildOptions;
-    };
-
-    std::vector<TestCase> tests;
-    tests.push_back({ Version(1, 1), "-cl-std=CL1.1" });
-    tests.push_back({ Version(1, 2), "-cl-std=CL1.2" });
-    tests.push_back({ Version(2, 0), "-cl-std=CL2.0" });
-    tests.push_back({ Version(3, 0), "-cl-std=CL3.0" });
-
-    for (const auto& testcase : tests)
-    {
-        if (clc_version >= testcase.version)
-        {
-            clProgramWrapper program;
-            cl_int error =
-                create_single_kernel_helper_create_program_for_device(
-                    context, device, &program, 1, &test_kernel,
-                    testcase.buildOptions);
-            test_error(error, "Unable to build program!");
-
-            log_info("    successfully built program with build options '%s'\n",
-                     testcase.buildOptions);
-        }
-    }
-
-    return TEST_PASS;
-}
-
-// This sub-test checks that CL_DEVICE_OPENCL_C_ALL_VERSIONS includes any
-// requirements for the API version, and that programs can be built for all
-// reported versions.
-static int test_CL_DEVICE_OPENCL_C_ALL_VERSIONS(cl_device_id device,
-                                                cl_context context)
-{
-    // For now, the required OpenCL C version is the same as the API version.
-    const Version api_version = get_device_cl_version(device);
-    bool found_api_version = false;
-
-    log_info(
-        "  testing compilation based on CL_DEVICE_OPENCL_C_ALL_VERSIONS\n");
-
-    cl_int error = CL_SUCCESS;
-
-    size_t sz = 0;
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_ALL_VERSIONS size");
-
-    std::vector<cl_name_version> clc_versions(sz / sizeof(cl_name_version));
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, sz,
-                            clc_versions.data(), NULL);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES");
-
-    for (const auto& clc_version : clc_versions)
-    {
-        const unsigned major = CL_VERSION_MAJOR(clc_version.version);
-        const unsigned minor = CL_VERSION_MINOR(clc_version.version);
-
-        if (strcmp(clc_version.name, "OpenCL C") == 0)
-        {
-            if (api_version == Version(major, minor))
-            {
-                found_api_version = true;
-            }
-
-            if (major == 1 && minor == 0)
-            {
-                log_info(
-                    "    skipping OpenCL C 1.0, there is no -cl-std=CL1.0.\n");
-                continue;
-            }
-
-            std::string buildOptions = "-cl-std=CL";
-            buildOptions += std::to_string(major);
-            buildOptions += ".";
-            buildOptions += std::to_string(minor);
-
-            clProgramWrapper program;
-            error = create_single_kernel_helper_create_program_for_device(
-                context, device, &program, 1, &test_kernel,
-                buildOptions.c_str());
-            test_error(error, "Unable to build program!");
-
-            log_info("    successfully built program with build options '%s'\n",
-                     buildOptions.c_str());
-        }
-        else
-        {
-            log_error("    unknown OpenCL C name '%s'.\n", clc_version.name);
-            return TEST_FAIL;
-        }
-    }
-
-    if (!found_api_version)
-    {
-        log_error("    didn't find required OpenCL C version '%s'!\n",
-                  api_version.to_string().c_str());
-        return TEST_FAIL;
-    }
-
-    return TEST_PASS;
-}
-
-// This sub-test checks that any required features are present for a specific
-// CL_DEVICE_OPENCL_C_VERSION.
-static int test_CL_DEVICE_OPENCL_C_VERSION_features(cl_device_id device,
-                                                    cl_context context)
-{
-    log_info("  testing for OPENCL_C_VERSION required features\n");
-
-    OpenCLCFeatures features;
-    int error = get_device_cl_c_features(device, features);
-    if (error)
-    {
-        log_error("Couldn't query OpenCL C features for the device!\n");
-        return TEST_FAIL;
-    }
-
-    const Version clc_version = get_device_cl_c_version(device);
-    if (clc_version >= Version(2, 0))
-    {
-        bool has_all_OpenCL_C_20_features =
-            features.supports__opencl_c_atomic_order_acq_rel
-            && features.supports__opencl_c_atomic_order_seq_cst
-            && features.supports__opencl_c_atomic_scope_device
-            && features.supports__opencl_c_atomic_scope_all_devices
-            && features.supports__opencl_c_device_enqueue
-            && features.supports__opencl_c_generic_address_space
-            && features.supports__opencl_c_pipes
-            && features.supports__opencl_c_program_scope_global_variables
-            && features.supports__opencl_c_work_group_collective_functions;
-
-        if (features.supports__opencl_c_images)
-        {
-            has_all_OpenCL_C_20_features = has_all_OpenCL_C_20_features
-                && features.supports__opencl_c_3d_image_writes
-                && features.supports__opencl_c_read_write_images;
-        }
-
-        test_assert_error(
-            has_all_OpenCL_C_20_features,
-            "At least one required OpenCL C 2.0 feature is missing!");
-    }
-
-    return TEST_PASS;
-}
-
-// This sub-test checks that all required OpenCL C versions are present for a
-// specific CL_DEVICE_OPENCL_C_VERSION.
-static int test_CL_DEVICE_OPENCL_C_VERSION_versions(cl_device_id device,
-                                                    cl_context context)
-{
-    log_info("  testing for OPENCL_C_VERSION required versions\n");
-
-    const Version device_clc_version = get_device_cl_c_version(device);
-
-    std::vector<Version> test_clc_versions;
-    test_clc_versions.push_back(Version(1, 0));
-    test_clc_versions.push_back(Version(1, 1));
-    test_clc_versions.push_back(Version(1, 2));
-    test_clc_versions.push_back(Version(2, 0));
-    test_clc_versions.push_back(Version(3, 0));
-
-    cl_int error = CL_SUCCESS;
-
-    size_t sz = 0;
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_ALL_VERSIONS size");
-
-    std::vector<cl_name_version> device_clc_versions(sz
-                                                     / sizeof(cl_name_version));
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, sz,
-                            device_clc_versions.data(), NULL);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES");
-
-    for (const auto& test_clc_version : test_clc_versions)
-    {
-        if (device_clc_version >= test_clc_version)
-        {
-            bool found = false;
-            for (const auto& check : device_clc_versions)
-            {
-                const unsigned major = CL_VERSION_MAJOR(check.version);
-                const unsigned minor = CL_VERSION_MINOR(check.version);
-
-                if (strcmp(check.name, "OpenCL C") == 0
-                    && test_clc_version == Version(major, minor))
-                {
-                    found = true;
-                    break;
-                }
-            }
-
-            if (found)
-            {
-                log_info("    found OpenCL C version '%s'\n",
-                         test_clc_version.to_string().c_str());
-            }
-            else
-            {
-                log_error("Didn't find OpenCL C version '%s'!\n",
-                          test_clc_version.to_string().c_str());
-                return TEST_FAIL;
-            }
-        }
-    }
-
-
-    return TEST_PASS;
-}
-
-int test_opencl_c_versions(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements)
-{
-    check_compiler_available(device);
-
-    const Version version = get_device_cl_version(device);
-
-    int result = TEST_PASS;
-
-    result |= test_CL_DEVICE_OPENCL_C_VERSION(device, context);
-
-    if (version >= Version(3, 0))
-    {
-        result |= test_CL_DEVICE_OPENCL_C_ALL_VERSIONS(device, context);
-        result |= test_CL_DEVICE_OPENCL_C_VERSION_features(device, context);
-        result |= test_CL_DEVICE_OPENCL_C_VERSION_versions(device, context);
-    }
-
-    return result;
-}

diff --git a/test_conformance/compiler/test_pragma_unroll.cpp b/test_conformance/compiler/test_pragma_unroll.cpp
index 67f4b93..3a5cbf9 100644
--- a/test_conformance/compiler/test_pragma_unroll.cpp
+++ b/test_conformance/compiler/test_pragma_unroll.cpp

@@ -258,13 +258,9 @@
   for (size_t kernelIdx = 0; kernelIdx < KERNEL_NUM; ++kernelIdx) {
     clProgramWrapper program;
     clKernelWrapper kernel;
-    if (create_single_kernel_helper(
-            context, &program, &kernel, 1,
-            (const char **)&pragma_unroll_kernels[kernelIdx], "pragma_unroll"))
-    {
-        log_error("The program we attempted to compile was: \n%s\n",
-                  pragma_unroll_kernels[kernelIdx]);
-        return -1;
+    if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, (const char **)&pragma_unroll_kernels[kernelIdx], "pragma_unroll", "-cl-std=CL2.0" ) ) {
+      log_error("The program we attempted to compile was: \n%s\n", pragma_unroll_kernels[kernelIdx]);
+      return -1;
     }
 
     clMemWrapper buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEMENT_NUM * sizeof(cl_uint), NULL, &error);

diff --git a/test_conformance/compiler/test_unload_platform_compiler.cpp b/test_conformance/compiler/test_unload_platform_compiler.cpp
deleted file mode 100644
index 039d472..0000000
--- a/test_conformance/compiler/test_unload_platform_compiler.cpp
+++ /dev/null

@@ -1,980 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "test_unload_platform_compiler_resources.hpp"
-
-#include <cassert>
-#include <chrono>
-#include <functional>
-#include <future>
-#include <initializer_list>
-#include <stdexcept>
-#include <string>
-#include <thread>
-#include <vector>
-
-namespace {
-
-class unload_test_failure : public std::runtime_error {
-public:
-    using std::runtime_error::runtime_error;
-
-    explicit unload_test_failure(const std::string &function, cl_int error)
-        : std::runtime_error(function + " == " + std::to_string(error))
-    {}
-};
-
-class build_base {
-public:
-    build_base(cl_context context, cl_device_id device)
-        : m_context{ context }, m_device{ device }
-    {}
-    virtual ~build_base() { reset(); }
-    build_base(const build_base &) = delete;
-    build_base &operator=(const build_base &) = delete;
-
-    virtual void create() = 0;
-
-    virtual void compile()
-    {
-        assert(nullptr != m_program);
-
-        const cl_int err = clCompileProgram(m_program, 1, &m_device, nullptr, 0,
-                                            nullptr, nullptr, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCompileProgram()", err);
-    }
-
-    virtual void link()
-    {
-        assert(nullptr != m_program);
-
-        cl_int err = CL_INVALID_PLATFORM;
-        m_executable = clLinkProgram(m_context, 1, &m_device, nullptr, 1,
-                                     &m_program, nullptr, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clLinkProgram()", err);
-        if (nullptr == m_executable)
-            throw unload_test_failure("clLinkProgram returned nullptr");
-    }
-
-    virtual void verify()
-    {
-        assert(nullptr != m_executable);
-
-        cl_int err = CL_INVALID_VALUE;
-
-        const clKernelWrapper kernel =
-            clCreateKernel(m_executable, "write_kernel", &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateKernel()", err);
-
-        const clCommandQueueWrapper queue =
-            clCreateCommandQueue(m_context, m_device, 0, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateCommandQueue()", err);
-
-        const clMemWrapper buffer = clCreateBuffer(
-            m_context, CL_MEM_READ_WRITE, sizeof(cl_uint), nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateBuffer()", err);
-
-        cl_uint value = 0;
-
-        err = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clSetKernelArg()", err);
-
-        static const size_t work_size = 1;
-        err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &work_size,
-                                     nullptr, 0, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clEnqueueNDRangeKernel()", err);
-
-        err = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0,
-                                  sizeof(cl_uint), &value, 0, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clEnqueueReadBuffer()", err);
-
-        err = clFinish(queue);
-        if (CL_SUCCESS != err) throw unload_test_failure("clFinish()", err);
-
-        if (42 != value)
-        {
-            throw unload_test_failure("Kernel wrote " + std::to_string(value)
-                                      + ", expected 42");
-        }
-    }
-
-    void reset()
-    {
-        if (m_program)
-        {
-            clReleaseProgram(m_program);
-            m_program = nullptr;
-        }
-        if (m_executable)
-        {
-            clReleaseProgram(m_executable);
-            m_executable = nullptr;
-        }
-    }
-
-    void build()
-    {
-        compile();
-        link();
-    }
-
-protected:
-    const cl_context m_context;
-    const cl_device_id m_device;
-    cl_program m_program{};
-    cl_program m_executable{};
-};
-
-/**
- * @brief initializer_list type for constructing loops over build tests.
- */
-using build_list = std::initializer_list<std::reference_wrapper<build_base>>;
-
-class build_with_source : public build_base {
-public:
-    using build_base::build_base;
-
-    void create() final
-    {
-        assert(nullptr == m_program);
-
-        static const char *sources[] = { write_kernel_source };
-
-        cl_int err = CL_INVALID_PLATFORM;
-        m_program =
-            clCreateProgramWithSource(m_context, 1, sources, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithSource()", err);
-        if (nullptr == m_program)
-            throw unload_test_failure(
-                "clCreateProgramWithSource returned nullptr");
-    }
-};
-
-class build_with_binary : public build_base {
-public:
-    build_with_binary(const cl_context context, const cl_device_id device,
-                      const std::vector<unsigned char> &binary)
-        : build_base{ context, device }, m_binary{ binary }
-    {}
-
-    build_with_binary(const cl_context context, const cl_device_id device)
-        : build_base{ context, device }
-    {
-        cl_int err = CL_INVALID_VALUE;
-
-        /* Build the program from source */
-        static const char *sources[] = { write_kernel_source };
-        clProgramWrapper program =
-            clCreateProgramWithSource(m_context, 1, sources, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithSource()", err);
-
-        err = clCompileProgram(program, 1, &m_device, nullptr, 0, nullptr,
-                               nullptr, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCompileProgram()", err);
-
-        const clProgramWrapper executable =
-            clLinkProgram(m_context, 1, &m_device, nullptr, 1, &program,
-                          nullptr, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clLinkProgram()", err);
-
-        size_t binary_size;
-        err = clGetProgramInfo(executable, CL_PROGRAM_BINARY_SIZES,
-                               sizeof(binary_size), &binary_size, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clGetProgramInfo()", err);
-
-        m_binary.resize(binary_size);
-
-        /* Grab the program binary */
-        unsigned char *binaries[] = { m_binary.data() };
-        err = clGetProgramInfo(executable, CL_PROGRAM_BINARIES,
-                               sizeof(unsigned char *), binaries, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clGetProgramInfo()", err);
-    }
-
-    void create() final
-    {
-        assert(nullptr == m_executable);
-
-        const unsigned char *binaries[] = { m_binary.data() };
-        const size_t binary_sizes[] = { m_binary.size() };
-
-        cl_int err = CL_INVALID_PLATFORM;
-        m_executable = clCreateProgramWithBinary(
-            m_context, 1, &m_device, binary_sizes, binaries, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithBinary()", err);
-        if (nullptr == m_executable)
-            throw unload_test_failure(
-                "clCreateProgramWithBinary returned nullptr");
-    }
-
-    void compile() final
-    {
-        assert(nullptr != m_executable);
-
-        /* Program created from binary, there is nothing to do */
-    }
-
-    void link() final
-    {
-        assert(nullptr != m_executable);
-
-        const cl_int err = clBuildProgram(m_executable, 1, &m_device, nullptr,
-                                          nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clBuildProgram()", err);
-    }
-
-private:
-    std::vector<unsigned char> m_binary;
-};
-
-class build_with_il : public build_base {
-public:
-    build_with_il(const cl_context context, const cl_platform_id platform,
-                  const cl_device_id device)
-        : build_base{ context, device }
-    {
-        /* Disable build_with_il if neither core nor extension functionality is
-         * available */
-        m_enabled = false;
-
-        Version version = get_device_cl_version(device);
-        if (version >= Version(2, 1))
-        {
-            std::string sILVersion = get_device_il_version_string(device);
-            if (version < Version(3, 0) || !sILVersion.empty())
-            {
-                m_enabled = true;
-            }
-
-            m_CreateProgramWithIL = clCreateProgramWithIL;
-        }
-        else if (is_extension_available(device, "cl_khr_il_program"))
-        {
-            m_CreateProgramWithIL = (decltype(m_CreateProgramWithIL))
-                clGetExtensionFunctionAddressForPlatform(
-                    platform, "clCreateProgramWithILKHR");
-            if (nullptr == m_CreateProgramWithIL)
-            {
-                throw unload_test_failure("cl_khr_il_program supported, but "
-                                          "function address is nullptr");
-            }
-            m_enabled = true;
-        }
-
-        cl_uint address_bits{};
-        const cl_int err =
-            clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
-                            &address_bits, nullptr);
-        if (CL_SUCCESS != err)
-        {
-            throw unload_test_failure("Failure getting device address bits");
-        }
-
-        switch (address_bits)
-        {
-            case 32:
-                m_spirv_binary = write_kernel_32_spv.data();
-                m_spirv_size = write_kernel_32_spv.size();
-                break;
-            case 64:
-                m_spirv_binary = write_kernel_64_spv.data();
-                m_spirv_size = write_kernel_64_spv.size();
-                break;
-            default: throw unload_test_failure("Invalid address bits");
-        }
-    }
-
-    void create() final
-    {
-        if (!m_enabled) return;
-
-        assert(nullptr == m_program);
-
-        cl_int err = CL_INVALID_PLATFORM;
-        m_program = m_CreateProgramWithIL(m_context, m_spirv_binary,
-                                          m_spirv_size, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithIL()", err);
-        if (nullptr == m_program)
-            throw unload_test_failure("clCreateProgramWithIL returned nullptr");
-    }
-
-    void compile() final
-    {
-        if (!m_enabled) return;
-        build_base::compile();
-    }
-
-    void link() final
-    {
-        if (!m_enabled) return;
-        build_base::link();
-    }
-
-    void verify() final
-    {
-        if (!m_enabled) return;
-        build_base::verify();
-    }
-
-private:
-    void *m_spirv_binary;
-    size_t m_spirv_size;
-    bool m_enabled;
-
-    using CreateProgramWithIL_fn = decltype(&clCreateProgramWithIL);
-    CreateProgramWithIL_fn m_CreateProgramWithIL;
-};
-}
-
-static cl_platform_id device_platform(cl_device_id device)
-{
-    cl_platform_id platform;
-    const cl_int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
-                                       sizeof(platform), &platform, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Failure getting platform of tested device\n");
-        return nullptr;
-    }
-
-    return platform;
-}
-
-static void unload_platform_compiler(const cl_platform_id platform)
-{
-    const cl_int err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-        throw unload_test_failure("clUnloadPlatformCompiler()", err);
-}
-
-/* Test calling the function with a valid platform */
-int test_unload_valid(cl_device_id device, cl_context, cl_command_queue, int)
-{
-    const cl_platform_id platform = device_platform(device);
-    const long int err = clUnloadPlatformCompiler(platform);
-
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n", err);
-        return 1;
-    }
-
-    return 0;
-}
-
-/* Test calling the function with invalid platform */
-int test_unload_invalid(cl_device_id, cl_context, cl_command_queue, int)
-{
-    const long int err = clUnloadPlatformCompiler(nullptr);
-
-    if (CL_INVALID_PLATFORM != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n", err);
-        return 1;
-    }
-
-    return 0;
-}
-
-/* Test calling the function multiple times in a row */
-int test_unload_repeated(cl_device_id device, cl_context context,
-                         cl_command_queue, int)
-{
-    check_compiler_available(device);
-
-    const cl_platform_id platform = device_platform(device);
-    try
-    {
-        build_with_source source(context, device);
-        build_with_binary binary(context, device);
-        build_with_il il(context, platform, device);
-
-        for (build_base &test : build_list{ source, binary, il })
-        {
-            unload_platform_compiler(platform);
-            unload_platform_compiler(platform);
-
-            test.create();
-            test.build();
-            test.verify();
-        }
-    } catch (const unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-
-    return 0;
-}
-
-/* Test calling the function between compilation and linking of programs */
-int test_unload_compile_unload_link(cl_device_id device, cl_context context,
-                                    cl_command_queue, int)
-{
-    check_compiler_available(device);
-
-    const cl_platform_id platform = device_platform(device);
-    try
-    {
-        build_with_source source(context, device);
-        build_with_binary binary(context, device);
-        build_with_il il(context, platform, device);
-
-        for (build_base &test : build_list{ source, binary, il })
-        {
-            unload_platform_compiler(platform);
-            test.create();
-            test.compile();
-            unload_platform_compiler(platform);
-            test.link();
-            test.verify();
-        }
-    } catch (const unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-
-    return 0;
-}
-
-/* Test calling the function between program build and kernel creation */
-int test_unload_build_unload_create_kernel(cl_device_id device,
-                                           cl_context context, cl_command_queue,
-                                           int)
-{
-    check_compiler_available(device);
-
-    const cl_platform_id platform = device_platform(device);
-    try
-    {
-        build_with_source source(context, device);
-        build_with_binary binary(context, device);
-        build_with_il il(context, platform, device);
-
-        for (build_base &test : build_list{ source, binary, il })
-        {
-            unload_platform_compiler(platform);
-            test.create();
-            test.build();
-            unload_platform_compiler(platform);
-            test.verify();
-        }
-    } catch (const unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-
-    return 0;
-}
-
-/* Test linking together two programs that were built with a call to the unload
- * function in between */
-int test_unload_link_different(cl_device_id device, cl_context context,
-                               cl_command_queue, int)
-{
-    check_compiler_available(device);
-
-    const cl_platform_id platform = device_platform(device);
-
-    static const char *sources_1[] = { "unsigned int a() { return 42; }" };
-    static const char *sources_2[] = { R"(
-		unsigned int a();
-		kernel void test(global unsigned int *p)
-		{
-			*p = a();
-		})" };
-
-    cl_int err = CL_INVALID_PLATFORM;
-
-    /* Create and compile program 1 */
-    const clProgramWrapper program_1 =
-        clCreateProgramWithSource(context, 1, sources_1, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    err = clCompileProgram(program_1, 1, &device, nullptr, 0, nullptr, nullptr,
-                           nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Unload the platform compiler */
-    err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Create and compile program 2 with the new compiler context */
-    const clProgramWrapper program_2 =
-        clCreateProgramWithSource(context, 1, sources_2, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    err = clCompileProgram(program_2, 1, &device, nullptr, 0, nullptr, nullptr,
-                           nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Link the two programs into an executable program */
-    const cl_program compiled_programs[] = { program_1, program_2 };
-
-    const clProgramWrapper executable =
-        clLinkProgram(context, 1, &device, nullptr, 2, compiled_programs,
-                      nullptr, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clLinkProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Verify execution of a kernel from the linked executable */
-    const clKernelWrapper kernel = clCreateKernel(executable, "test", &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateKernel() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    const clCommandQueueWrapper queue =
-        clCreateCommandQueue(context, device, 0, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateCommandQueue() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    const clMemWrapper buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                               sizeof(cl_uint), nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateBuffer() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    cl_uint value = 0;
-
-    err = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clSetKernelArg() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    static const size_t work_size = 1;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &work_size, nullptr,
-                                 0, nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clEnqueueNDRangeKernel() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    err = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0, sizeof(cl_uint),
-                              &value, 0, nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clEnqueueReadBuffer() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    err = clFinish(queue);
-    if (CL_SUCCESS != err) throw unload_test_failure("clFinish()", err);
-
-    if (42 != value)
-    {
-        log_error("Test failure: Kernel wrote %lu, expected 42)\n",
-                  static_cast<long unsigned>(value));
-        return 1;
-    }
-
-    return 0;
-}
-
-/* Test calling the function in a thread while others threads are building
- * programs */
-int test_unload_build_threaded(cl_device_id device, cl_context context,
-                               cl_command_queue, int)
-{
-    using clock = std::chrono::steady_clock;
-
-    check_compiler_available(device);
-
-    const cl_platform_id platform = device_platform(device);
-
-    const auto end = clock::now() + std::chrono::seconds(5);
-
-    const auto unload_thread = [&end, platform] {
-        bool success = true;
-
-        /* Repeatedly unload the compiler */
-        try
-        {
-            while (clock::now() < end)
-            {
-                unload_platform_compiler(platform);
-            }
-        } catch (const unload_test_failure &e)
-        {
-            log_error("Test failure: %s\n", e.what());
-            success = false;
-        }
-
-        return success;
-    };
-
-    const auto build_thread = [&end](build_base *build) {
-        bool success = true;
-
-        try
-        {
-            while (clock::now() < end)
-            {
-                build->create();
-                build->build();
-                build->verify();
-                build->reset();
-            }
-        } catch (unload_test_failure &e)
-        {
-            log_error("Test failure: %s\n", e.what());
-            success = false;
-        }
-
-        return success;
-    };
-
-    build_with_source build_source(context, device);
-    build_with_binary build_binary(context, device);
-    build_with_il build_il(context, platform, device);
-
-    /* Run all threads in parallel and wait for them to finish */
-    std::future<bool> unload_result =
-        std::async(std::launch::async, unload_thread);
-    std::future<bool> build_source_result =
-        std::async(std::launch::async, build_thread, &build_source);
-    std::future<bool> build_binary_result =
-        std::async(std::launch::async, build_thread, &build_binary);
-    std::future<bool> build_il_result =
-        std::async(std::launch::async, build_thread, &build_il);
-
-    bool success = true;
-    if (!unload_result.get())
-    {
-        log_error("unload_thread failed\n");
-        success = false;
-    }
-    if (!build_source_result.get())
-    {
-        log_error("build_with_source failed\n");
-        success = false;
-    }
-    if (!build_binary_result.get())
-    {
-        log_error("build_with_binary failed\n");
-        success = false;
-    }
-    if (!build_il_result.get())
-    {
-        log_error("build_with_il failed\n");
-        success = false;
-    }
-
-    return success ? 0 : 1;
-}
-
-/* Test grabbing program build information after calling the unload function */
-int test_unload_build_info(cl_device_id device, cl_context context,
-                           cl_command_queue, int)
-{
-    check_compiler_available(device);
-
-    const cl_platform_id platform = device_platform(device);
-
-    static const char *sources[] = { write_kernel_source };
-
-    cl_int err = CL_INVALID_PLATFORM;
-    /* Create and build the initial program from source */
-    const clProgramWrapper program =
-        clCreateProgramWithSource(context, 1, sources, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    static const std::string options("-Dtest");
-
-    err =
-        clBuildProgram(program, 1, &device, options.c_str(), nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Unload the compiler */
-    err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    std::vector<cl_program_build_info> infos{ CL_PROGRAM_BUILD_STATUS,
-                                              CL_PROGRAM_BUILD_OPTIONS,
-                                              CL_PROGRAM_BUILD_LOG,
-                                              CL_PROGRAM_BINARY_TYPE };
-
-    if (get_device_cl_version(device) >= Version(2, 0))
-    {
-        infos.push_back(CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE);
-    }
-
-    /* Try grabbing the infos after the compiler unload */
-    for (cl_program_build_info info : infos)
-    {
-        size_t info_size = 0;
-        err = clGetProgramBuildInfo(program, device, info, 0, nullptr,
-                                    &info_size);
-        if (CL_SUCCESS != err)
-        {
-            log_error("Test failure: clGetProgramBuildInfo() == %ld\n",
-                      static_cast<long int>(err));
-            return 1;
-        }
-
-        std::vector<char> info_value(info_size);
-
-        size_t written_size = 0;
-        err = clGetProgramBuildInfo(program, device, info, info_size,
-                                    &info_value[0], &written_size);
-        if (CL_SUCCESS != err)
-        {
-            log_error("Test failure: clGetProgramBuildInfo() == %ld\n",
-                      static_cast<long int>(err));
-            return 1;
-        }
-        else if (written_size != info_size)
-        {
-            log_error("Test failure: Written info value size (%zu) was "
-                      "different from "
-                      "queried size (%zu).\n",
-                      written_size, info_size);
-            return 1;
-        }
-
-        /* Verify the information we know the answer to */
-        switch (info)
-        {
-            case CL_PROGRAM_BUILD_STATUS: {
-                constexpr size_t value_size = sizeof(cl_build_status);
-                if (value_size != info_size)
-                {
-                    log_error("Test failure: Expected CL_PROGRAM_BUILD_STATUS "
-                              "of size %zu, "
-                              "but got %zu\n",
-                              value_size, info_size);
-                    return 1;
-                }
-                cl_build_status value;
-                memcpy(&value, &info_value[0], value_size);
-                if (CL_BUILD_SUCCESS != value)
-                {
-                    log_error(
-                        "Test failure: CL_PROGRAM_BUILD_STATUS did not return "
-                        "CL_BUILD_SUCCESS (%ld), but %ld\n",
-                        static_cast<long int>(CL_BUILD_SUCCESS),
-                        static_cast<long int>(value));
-                    return 1;
-                }
-            }
-            break;
-
-            case CL_PROGRAM_BUILD_OPTIONS: {
-                const size_t value_size = options.length() + 1;
-                if (value_size != info_size)
-                {
-                    log_error("Test failure: Expected CL_PROGRAM_BUILD_OPTIONS "
-                              "of size "
-                              "%zu, but got %zu\n",
-                              value_size, info_size);
-                    return 1;
-                }
-                else if (options != &info_value[0])
-                {
-                    log_error("Test failure: CL_PROGRAM_BUILD_OPTIONS returned "
-                              "\"%s\" "
-                              "instead of \"%s\"\n",
-                              &info_value[0], options.c_str());
-                    return 1;
-                }
-            }
-            break;
-
-            case CL_PROGRAM_BINARY_TYPE: {
-                constexpr size_t value_size = sizeof(cl_program_binary_type);
-                if (value_size != info_size)
-                {
-                    log_error("Test failure: Expected CL_PROGRAM_BINARY_TYPE "
-                              "of size %zu, "
-                              "but got %zu\n",
-                              value_size, info_size);
-                    return 1;
-                }
-                cl_program_binary_type value;
-                memcpy(&value, &info_value[0], value_size);
-                if (CL_PROGRAM_BINARY_TYPE_EXECUTABLE != value)
-                {
-                    log_error(
-                        "Test failure: CL_PROGRAM_BINARY_TYPE did not return "
-                        "CL_PROGRAM_BINARY_TYPE_EXECUTABLE (%ld), but %ld\n",
-                        static_cast<long int>(
-                            CL_PROGRAM_BINARY_TYPE_EXECUTABLE),
-                        static_cast<long int>(value));
-                    return 1;
-                }
-            }
-            break;
-        }
-    }
-
-    return 0;
-}
-
-/* Test calling the unload function between program building and fetching the
- * program binaries */
-int test_unload_program_binaries(cl_device_id device, cl_context context,
-                                 cl_command_queue, int)
-{
-    check_compiler_available(device);
-
-    const cl_platform_id platform = device_platform(device);
-
-    static const char *sources[] = { write_kernel_source };
-
-    cl_int err = CL_INVALID_PLATFORM;
-    /* Create and build the initial program from source */
-    const clProgramWrapper program =
-        clCreateProgramWithSource(context, 1, sources, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Unload the compiler */
-    err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Grab the built executable binary after the compiler unload */
-    size_t binary_size;
-    err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
-                           sizeof(binary_size), &binary_size, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clGetProgramInfo() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    std::vector<unsigned char> binary(binary_size);
-
-    unsigned char *binaries[] = { binary.data() };
-    err = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
-                           sizeof(unsigned char *), binaries, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clGetProgramInfo() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-
-    /* Create a new program from the binary and test its execution */
-    try
-    {
-        build_with_binary build_binary(context, device, binary);
-        build_binary.create();
-        build_binary.build();
-        build_binary.verify();
-    } catch (unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-
-    return 0;
-}

diff --git a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp b/test_conformance/compiler/test_unload_platform_compiler_resources.hpp
deleted file mode 100644
index 82f87ff..0000000
--- a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp
+++ /dev/null

@@ -1,50 +0,0 @@
-#include <array>
-
-static const char write_kernel_source[] = R"(
-	kernel void write_kernel(global unsigned int *p) {
-		*p = 42;
-	})";
-
-/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */
-static std::array<unsigned char, 216> write_kernel_64_spv{
-    { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
-      0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
-      0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
-      0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-      0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
-      0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
-};
-
-/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */
-static std::array<unsigned char, 216> write_kernel_32_spv{
-    { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
-      0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
-      0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
-      0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-      0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
-      0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
-};

diff --git a/test_conformance/compiler/write_kernel.spvasm32 b/test_conformance/compiler/write_kernel.spvasm32
deleted file mode 100644
index b6a3fc1..0000000
--- a/test_conformance/compiler/write_kernel.spvasm32
+++ /dev/null

@@ -1,24 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 11
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-;         %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %6 "write_kernel"
-;        %10 = OpString "kernel_arg_type.write_kernel.uint*,"
-;              OpSource OpenCL_C 200000
-;              OpDecorate %7 FuncParamAttr NoCapture
-       %uint = OpTypeInt 32 0
-    %uint_42 = OpConstant %uint 42
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %5 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-          %6 = OpFunction %void None %5
-          %7 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-          %8 = OpLabel
-               OpStore %7 %uint_42 Aligned 4
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/compiler/write_kernel.spvasm64 b/test_conformance/compiler/write_kernel.spvasm64
deleted file mode 100644
index 0923bc1..0000000
--- a/test_conformance/compiler/write_kernel.spvasm64
+++ /dev/null

@@ -1,24 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 11
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-;         %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %6 "write_kernel"
-;        %10 = OpString "kernel_arg_type.write_kernel.uint*,"
-;              OpSource OpenCL_C 200000
-;              OpDecorate %7 FuncParamAttr NoCapture
-       %uint = OpTypeInt 32 0
-    %uint_42 = OpConstant %uint 42
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %5 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-          %6 = OpFunction %void None %5
-          %7 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-          %8 = OpLabel
-               OpStore %7 %uint_42 Aligned 4
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/computeinfo/CMakeLists.txt b/test_conformance/computeinfo/CMakeLists.txt
index 207223a..c952b17 100644
--- a/test_conformance/computeinfo/CMakeLists.txt
+++ b/test_conformance/computeinfo/CMakeLists.txt

@@ -2,9 +2,7 @@
 
 set(${MODULE_NAME}_SOURCES
         main.cpp
-        device_uuid.cpp
         extended_versioning.cpp
-        conforming_version.cpp
 )
 
 include(../CMakeCommon.txt)

diff --git a/test_conformance/computeinfo/conforming_version.cpp b/test_conformance/computeinfo/conforming_version.cpp
deleted file mode 100644
index 624cf85..0000000
--- a/test_conformance/computeinfo/conforming_version.cpp
+++ /dev/null

@@ -1,37 +0,0 @@
-
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <regex>
-#include "harness/testHarness.h"
-#include "harness/deviceInfo.h"
-
-int test_conformance_version(cl_device_id deviceID, cl_context context,
-                             cl_command_queue ignoreQueue, int num_elements)
-{
-    std::string version_string{ get_device_info_string(
-        deviceID, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED) };
-
-    // Latest conformance version passed should match vYYYY-MM-DD-XX, where XX
-    // is a number
-    std::regex valid_format("^v\\d{4}-(((0)[1-9])|((1)[0-2]))-((0)[1-9]|[1-2]["
-                            "0-9]|(3)[0-1])-\\d{2}$");
-    test_assert_error(
-        std::regex_match(version_string, valid_format),
-        "CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED does not return "
-        "valid format vYYYY-MM-DD-XX");
-
-    return TEST_PASS;
-}

diff --git a/test_conformance/computeinfo/device_uuid.cpp b/test_conformance/computeinfo/device_uuid.cpp
deleted file mode 100644
index 1ef9dad..0000000
--- a/test_conformance/computeinfo/device_uuid.cpp
+++ /dev/null

@@ -1,211 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <array>
-#include <bitset>
-
-#include "harness/testHarness.h"
-#include "harness/deviceInfo.h"
-
-using uuid = std::array<cl_uchar, CL_UUID_SIZE_KHR>;
-using luid = std::array<cl_uchar, CL_LUID_SIZE_KHR>;
-
-template <typename T> static void log_info_uuid(const T &id)
-{
-    for (const cl_uchar c : id)
-    {
-        log_info("%02x", static_cast<unsigned>(c));
-    }
-}
-
-template <typename T> static void log_error_uuid(const T &id)
-{
-    for (const cl_uchar c : id)
-    {
-        log_error("%02x", static_cast<unsigned>(c));
-    }
-}
-
-static bool check_device_info_returns(const cl_int err, const size_t size,
-                                      const size_t expected_size)
-{
-    if (err != CL_SUCCESS)
-    {
-        print_error(err, "clGetDeviceInfo failed");
-        return false;
-    }
-    else if (size != expected_size)
-    {
-        log_error("Invalid size written by clGetDeviceInfo (%zu != %zu)\n",
-                  size, expected_size);
-        return false;
-    }
-
-    return true;
-}
-
-template <typename T>
-static bool get_uuid(const cl_device_id device, const cl_device_info info,
-                     T &id, const bool twice = true)
-{
-    const size_t id_size = id.size() * sizeof(id[0]);
-
-    size_t size_ret;
-    cl_int err = clGetDeviceInfo(device, info, id_size, id.data(), &size_ret);
-    if (!check_device_info_returns(err, size_ret, id_size))
-    {
-        return false;
-    }
-
-    /* Check that IDs are (at the very least) stable across two successive
-     * clGetDeviceInfo calls. Check conditionally, as it is undefined what the
-     * query for CL_DEVICE_LUID_KHR returns if CL_DEVICE_LUID_VALID_KHR returns
-     * false. */
-    if (twice)
-    {
-        T id_2;
-        size_t size_ret_2;
-        err = clGetDeviceInfo(device, info, id_size, id_2.data(), &size_ret_2);
-        if (!check_device_info_returns(err, size_ret_2, id_size))
-        {
-            return false;
-        }
-
-        if (id != id_2)
-        {
-            log_error("Got different IDs from the same ID device info (");
-            log_error_uuid(id);
-            log_error(" != ");
-            log_error_uuid(id_2);
-            log_error(")\n");
-            return false;
-        }
-    }
-
-    return true;
-}
-
-int test_device_uuid(cl_device_id deviceID, cl_context context,
-                     cl_command_queue ignoreQueue, int num_elements)
-{
-    if (!is_extension_available(deviceID, "cl_khr_device_uuid"))
-    {
-        log_info("cl_khr_device_uuid not supported. Skipping test...\n");
-        return 0;
-    }
-
-    int total_errors = 0;
-
-    /* CL_DEVICE_UUID_KHR */
-    uuid device_uuid;
-    bool success = get_uuid(deviceID, CL_DEVICE_UUID_KHR, device_uuid);
-    if (!success)
-    {
-        log_error("Error getting device UUID\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tDevice UUID: ");
-        log_info_uuid(device_uuid);
-        log_info("\n");
-    }
-
-    /* CL_DRIVER_UUID_KHR */
-    uuid driver_uuid;
-    success = get_uuid(deviceID, CL_DRIVER_UUID_KHR, driver_uuid);
-    if (!success)
-    {
-        log_error("Error getting driver UUID\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tDriver UUID: ");
-        log_info_uuid(driver_uuid);
-        log_info("\n");
-    }
-
-    size_t size_ret{};
-
-    /* CL_DEVICE_LUID_VALID_KHR */
-    cl_bool device_luid_valid{};
-    cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_LUID_VALID_KHR,
-                                 sizeof(device_luid_valid), &device_luid_valid,
-                                 &size_ret);
-    if (!check_device_info_returns(err, size_ret, sizeof(device_luid_valid)))
-    {
-        log_error("Error getting device LUID validity\n");
-        ++total_errors;
-        device_luid_valid = false;
-    }
-    else
-    {
-        log_info("\tDevice LUID validity is %s\n",
-                 device_luid_valid ? "true" : "false");
-    }
-
-    /* CL_DEVICE_LUID_KHR */
-    luid device_luid;
-    success =
-        get_uuid(deviceID, CL_DEVICE_LUID_KHR, device_luid, device_luid_valid);
-    if (!success)
-    {
-        log_error("Error getting device LUID\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tDevice LUID: ");
-        log_info_uuid(device_luid);
-        log_info("\n");
-    }
-
-    /* CL_DEVICE_NODE_MASK_KHR */
-    cl_uint device_node_mask{};
-    err =
-        clGetDeviceInfo(deviceID, CL_DEVICE_NODE_MASK_KHR,
-                        sizeof(device_node_mask), &device_node_mask, &size_ret);
-    if (!check_device_info_returns(err, size_ret, sizeof(device_node_mask)))
-    {
-        log_error("Error getting device node mask\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tNode mask  : %08lx\n",
-                 static_cast<unsigned long>(device_node_mask));
-
-        /* If the LUID is valid, there must be one and only one bit set in the
-         * node mask */
-        if (device_luid_valid)
-        {
-            static constexpr size_t cl_uint_size_in_bits = 32;
-            const size_t bit_count =
-                std::bitset<cl_uint_size_in_bits>(device_node_mask).count();
-            if (1 != bit_count)
-            {
-                log_error("Wrong amount of bits set in node mask (%zu != 1) "
-                          "with valid LUID\n",
-                          bit_count);
-                ++total_errors;
-            }
-        }
-    }
-
-    return total_errors;
-}

diff --git a/test_conformance/computeinfo/extended_versioning.cpp b/test_conformance/computeinfo/extended_versioning.cpp
index 179e902..f9c29b9 100644
--- a/test_conformance/computeinfo/extended_versioning.cpp
+++ b/test_conformance/computeinfo/extended_versioning.cpp

@@ -17,7 +17,6 @@
 
 #include <vector>
 #include <set>
-#include <iterator>
 #include <algorithm>
 #include <cstring>
 #include "harness/testHarness.h"
@@ -243,8 +242,7 @@
 
 /* Check that CL_DEVICE{,_OPENCL_C}_NUMERIC_VERSION_KHR return the same versions
  * as CL_DEVICE{,_OPENCL_C}_VERSION */
-static int test_extended_versioning_device_versions(bool ext,
-                                                    cl_device_id deviceID)
+static int test_extended_versioning_device_versions(cl_device_id deviceID)
 {
     log_info("Device versions:\n");
 
@@ -261,15 +259,6 @@
 
     for (const auto& query : device_version_queries)
     {
-        // CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR is only supported by
-        // cl_khr_extended_versioning:
-        if (!ext
-            && query.param_name_numeric
-                == CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR)
-        {
-            continue;
-        }
-
         const std::vector<char> version_string(
             get_device_string(deviceID, query.param_name_string));
         if (version_string.empty())
@@ -698,41 +687,14 @@
     return 0;
 }
 
-// Assumes the core enums, structures, and macros exactly match
-// the extension enums, structures, and macros:
-
-static_assert(CL_PLATFORM_NUMERIC_VERSION == CL_PLATFORM_NUMERIC_VERSION_KHR,
-              "CL_PLATFORM_NUMERIC_VERSION mismatch");
-static_assert(CL_PLATFORM_EXTENSIONS_WITH_VERSION
-                  == CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR,
-              "CL_PLATFORM_EXTENSIONS_WITH_VERSION mismatch");
-
-static_assert(CL_DEVICE_NUMERIC_VERSION == CL_DEVICE_NUMERIC_VERSION_KHR,
-              "CL_DEVICE_NUMERIC_VERSION mismatch");
-static_assert(CL_DEVICE_EXTENSIONS_WITH_VERSION
-                  == CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR,
-              "CL_DEVICE_EXTENSIONS_WITH_VERSION mismatch");
-static_assert(CL_DEVICE_ILS_WITH_VERSION == CL_DEVICE_ILS_WITH_VERSION_KHR,
-              "CL_DEVICE_ILS_WITH_VERSION mismatch");
-static_assert(CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION
-                  == CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR,
-              "CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION mismatch");
-
-static_assert(sizeof(cl_name_version) == sizeof(cl_name_version_khr),
-              "cl_name_version mismatch");
-
-static_assert(CL_MAKE_VERSION(1, 2, 3) == CL_MAKE_VERSION_KHR(1, 2, 3),
-              "CL_MAKE_VERSION mismatch");
-
 int test_extended_versioning(cl_device_id deviceID, cl_context context,
                              cl_command_queue ignoreQueue, int num_elements)
 {
-    bool ext = is_extension_available(deviceID, "cl_khr_extended_versioning");
-    bool core = get_device_cl_version(deviceID) >= Version(3, 0);
-
-    if (!ext && !core)
+    if (!is_extension_available(deviceID, "cl_khr_extended_versioning"))
     {
-        return TEST_SKIPPED_ITSELF;
+        log_info(
+            "cl_khr_extended_versioning not supported. Skipping test...\n");
+        return 0;
     }
 
     cl_platform_id platform;
@@ -743,7 +705,7 @@
     int total_errors = 0;
     total_errors += test_extended_versioning_platform_version(platform);
     total_errors += test_extended_versioning_platform_extensions(platform);
-    total_errors += test_extended_versioning_device_versions(ext, deviceID);
+    total_errors += test_extended_versioning_device_versions(deviceID);
     total_errors += test_extended_versioning_device_extensions(deviceID);
     total_errors += test_extended_versioning_device_il(deviceID);
     total_errors += test_extended_versioning_device_built_in_kernels(deviceID);

diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 4860b44..0bc04a9 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp

@@ -59,10 +59,6 @@
     type_cl_ulong,
     type_string,
     type_cl_device_svm_capabilities,
-    type_cl_device_atomic_capabilities,
-    type_cl_device_device_enqueue_capabilities,
-    type_cl_name_version_array,
-    type_cl_name_version,
 };
 
 typedef union {
@@ -80,10 +76,6 @@
     cl_ulong ull;
     char* string;
     cl_device_svm_capabilities svmCapabilities;
-    cl_device_atomic_capabilities atomicCapabilities;
-    cl_device_device_enqueue_capabilities deviceEnqueueCapabilities;
-    cl_name_version* cl_name_version_array;
-    cl_name_version cl_name_version_single;
 } config_data;
 
 struct _version
@@ -125,7 +117,6 @@
     const char* opcode_name;
     int config_type;
     config_data config;
-    size_t opcode_ret_size;
 } config_info;
 
 #define CONFIG_INFO(major, minor, opcode, type)                                \
@@ -227,7 +218,9 @@
     CONFIG_INFO(1, 1, CL_DEVICE_VENDOR, string),
     CONFIG_INFO(1, 1, CL_DRIVER_VERSION, string),
     CONFIG_INFO(1, 1, CL_DEVICE_PROFILE, string),
+    CONFIG_INFO(1, 1, CL_DEVICE_VERSION, string),
     CONFIG_INFO(1, 1, CL_DEVICE_OPENCL_C_VERSION, string),
+    CONFIG_INFO(1, 1, CL_DEVICE_EXTENSIONS, string),
 
     CONFIG_INFO(2, 0, CL_DEVICE_MAX_PIPE_ARGS, cl_uint),
     CONFIG_INFO(2, 0, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint),
@@ -255,25 +248,6 @@
     CONFIG_INFO(2, 1, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint),
     CONFIG_INFO(2, 1, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
                 cl_uint),
-    CONFIG_INFO(3, 0, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-                cl_device_atomic_capabilities),
-    CONFIG_INFO(3, 0, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
-                cl_device_atomic_capabilities),
-    CONFIG_INFO(3, 0, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, cl_uint),
-    CONFIG_INFO(3, 0, CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t),
-    CONFIG_INFO(3, 0, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT,
-                cl_uint),
-    CONFIG_INFO(3, 0, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, cl_uint),
-    CONFIG_INFO(3, 0, CL_DEVICE_OPENCL_C_FEATURES, cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
-                cl_device_device_enqueue_capabilities),
-    CONFIG_INFO(3, 0, CL_DEVICE_PIPE_SUPPORT, cl_uint),
-    CONFIG_INFO(3, 0, CL_DEVICE_NUMERIC_VERSION, cl_name_version),
-    CONFIG_INFO(3, 0, CL_DEVICE_EXTENSIONS_WITH_VERSION, cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_DEVICE_OPENCL_C_ALL_VERSIONS, cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_DEVICE_ILS_WITH_VERSION, cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION,
-                cl_name_version_array),
 };
 
 #define ENTRY(major, minor, T)                                                 \
@@ -396,62 +370,6 @@
 
     return num_errors;
 }
-int getPlatformConfigInfo(cl_platform_id platform, config_info* info)
-{
-    int err = CL_SUCCESS;
-    int size_err = 0;
-    size_t config_size_set;
-    size_t config_size_ret;
-    switch (info->config_type)
-    {
-        case type_string:
-            err = clGetPlatformInfo(platform, info->opcode, 0, NULL,
-                                    &config_size_set);
-            info->config.string = NULL;
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                info->config.string = (char*)malloc(config_size_set);
-                err = clGetPlatformInfo(platform, info->opcode, config_size_set,
-                                        info->config.string, &config_size_ret);
-                size_err = config_size_set != config_size_ret;
-            }
-            break;
-        case type_cl_name_version_array:
-            err = clGetPlatformInfo(platform, info->opcode, 0, NULL,
-                                    &config_size_set);
-            info->config.cl_name_version_array = NULL;
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                info->config.cl_name_version_array = (cl_name_version*)malloc(
-                    config_size_set * sizeof(cl_name_version));
-                err = clGetPlatformInfo(platform, info->opcode, config_size_set,
-                                        info->config.cl_name_version_array,
-                                        &config_size_ret);
-                size_err = config_size_set != config_size_ret;
-                info->opcode_ret_size = config_size_ret;
-            }
-            break;
-        case type_cl_name_version:
-            err = clGetPlatformInfo(platform, info->opcode, 0, NULL,
-                                    &config_size_set);
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                err = clGetPlatformInfo(platform, info->opcode, config_size_set,
-                                        &info->config.cl_name_version_single,
-                                        &config_size_ret);
-            }
-            size_err = config_size_set != config_size_ret;
-            break;
-        default:
-            log_error("Unknown config type: %d\n", info->config_type);
-            break;
-    }
-    if (err || size_err)
-        log_error("\tFailed clGetPlatformInfo for %s.\n", info->opcode_name);
-    if (err) print_error(err, "\t\tclGetPlatformInfo failed.");
-    if (size_err) log_error("\t\tWrong size return from clGetPlatformInfo.\n");
-    return err || size_err;
-}
 
 int getConfigInfo(cl_device_id device, config_info* info)
 {
@@ -551,43 +469,6 @@
                 device, info->opcode, sizeof(info->config.svmCapabilities),
                 &info->config.svmCapabilities, &config_size_ret);
             break;
-        case type_cl_device_device_enqueue_capabilities:
-            err = clGetDeviceInfo(
-                device, info->opcode,
-                sizeof(info->config.deviceEnqueueCapabilities),
-                &info->config.deviceEnqueueCapabilities, &config_size_ret);
-            break;
-        case type_cl_device_atomic_capabilities:
-            err = clGetDeviceInfo(
-                device, info->opcode, sizeof(info->config.atomicCapabilities),
-                &info->config.atomicCapabilities, &config_size_ret);
-            break;
-        case type_cl_name_version_array:
-            err = clGetDeviceInfo(device, info->opcode, 0, NULL,
-                                  &config_size_set);
-            info->config.cl_name_version_array = NULL;
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                info->config.cl_name_version_array = (cl_name_version*)malloc(
-                    config_size_set * sizeof(cl_name_version));
-                err = clGetDeviceInfo(device, info->opcode, config_size_set,
-                                      info->config.cl_name_version_array,
-                                      &config_size_ret);
-                size_err = config_size_set != config_size_ret;
-                info->opcode_ret_size = config_size_ret;
-            }
-            break;
-        case type_cl_name_version:
-            err = clGetDeviceInfo(device, info->opcode, 0, NULL,
-                                  &config_size_set);
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                err = clGetDeviceInfo(device, info->opcode, config_size_set,
-                                      &info->config.cl_name_version_single,
-                                      &config_size_ret);
-            }
-            size_err = config_size_set != config_size_ret;
-            break;
         default:
             log_error("Unknown config type: %d\n", info->config_type);
             break;
@@ -599,7 +480,7 @@
     return err || size_err;
 }
 
-void dumpConfigInfo(config_info* info)
+void dumpConfigInfo(cl_device_id device, config_info* info)
 {
     // We should not error if we find an unknown configuration since vendors
     // may specify their own options beyond the list in the specification.
@@ -826,111 +707,6 @@
                         (info->config.svmCapabilities & ~all_svm_capabilities));
             }
             break;
-        case type_cl_device_device_enqueue_capabilities:
-            log_info("\t%s == %s|%s\n", info->opcode_name,
-                     (info->config.deviceEnqueueCapabilities
-                      & CL_DEVICE_QUEUE_SUPPORTED)
-                         ? "CL_DEVICE_QUEUE_SUPPORTED"
-                         : "",
-                     (info->config.deviceEnqueueCapabilities
-                      & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT)
-                         ? "CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT"
-                         : "");
-            {
-                cl_device_device_enqueue_capabilities
-                    all_device_enqueue_capabilities = CL_DEVICE_QUEUE_SUPPORTED
-                    | CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT;
-                if (info->config.deviceEnqueueCapabilities
-                    & ~all_device_enqueue_capabilities)
-                    log_info("WARNING: %s unknown bits found 0x%08" PRIX64,
-                             info->opcode_name,
-                             (info->config.deviceEnqueueCapabilities
-                              & ~all_device_enqueue_capabilities));
-            }
-            break;
-        case type_cl_device_atomic_capabilities:
-            log_info("\t%s == %s|%s|%s|%s|%s|%s|%s\n", info->opcode_name,
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_ORDER_RELAXED)
-                         ? "CL_DEVICE_ATOMIC_ORDER_RELAXED"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_ORDER_ACQ_REL)
-                         ? "CL_DEVICE_ATOMIC_ORDER_ACQ_REL"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_ORDER_SEQ_CST)
-                         ? "CL_DEVICE_ATOMIC_ORDER_SEQ_CST"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_DEVICE)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_DEVICE"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES"
-                         : "");
-            {
-                cl_device_atomic_capabilities all_atomic_capabilities =
-                    CL_DEVICE_ATOMIC_ORDER_RELAXED
-                    | CL_DEVICE_ATOMIC_ORDER_ACQ_REL
-                    | CL_DEVICE_ATOMIC_ORDER_SEQ_CST
-                    | CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM
-                    | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP
-                    | CL_DEVICE_ATOMIC_SCOPE_DEVICE
-                    | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES;
-                if (info->config.atomicCapabilities & ~all_atomic_capabilities)
-                    log_info("WARNING: %s unknown bits found 0x%08" PRIX64,
-                             info->opcode_name,
-                             (info->config.atomicCapabilities
-                              & ~all_atomic_capabilities));
-            }
-            break;
-        case type_cl_name_version_array: {
-            int number_of_version_items = info->opcode_ret_size
-                / sizeof(*info->config.cl_name_version_array);
-            log_info("\t%s supported name and version:\n", info->opcode_name);
-            if (number_of_version_items == 0)
-            {
-                log_info("\t\t\"\"\n");
-            }
-            else
-            {
-                for (int f = 0; f < number_of_version_items; f++)
-                {
-                    cl_name_version new_version_item =
-                        info->config.cl_name_version_array[f];
-                    cl_version new_version_major =
-                        CL_VERSION_MAJOR_KHR(new_version_item.version);
-                    cl_version new_version_minor =
-                        CL_VERSION_MINOR_KHR(new_version_item.version);
-                    cl_version new_version_patch =
-                        CL_VERSION_PATCH_KHR(new_version_item.version);
-                    log_info("\t\t\"%s\" %d.%d.%d\n", new_version_item.name,
-                             CL_VERSION_MAJOR_KHR(new_version_item.version),
-                             CL_VERSION_MINOR_KHR(new_version_item.version),
-                             CL_VERSION_PATCH_KHR(new_version_item.version));
-                }
-            }
-            break;
-        }
-        case type_cl_name_version:
-            log_info("\t%s == %d.%d.%d\n", info->opcode_name,
-                     CL_VERSION_MAJOR_KHR(
-                         info->config.cl_name_version_single.version),
-                     CL_VERSION_MINOR_KHR(
-                         info->config.cl_name_version_single.version),
-                     CL_VERSION_PATCH_KHR(
-                         info->config.cl_name_version_single.version));
-            break;
     }
 }
 
@@ -1125,7 +901,7 @@
             err = getConfigInfo(device, &info);
             if (!err)
             {
-                dumpConfigInfo(&info);
+                dumpConfigInfo(device, &info);
                 if (info.opcode == CL_DEVICE_VERSION)
                 {
                     err = parseVersion(info.config.string, &version);
@@ -1150,10 +926,6 @@
                 {
                     free(info.config.string);
                 }
-                if (info.config_type == type_cl_name_version_array)
-                {
-                    free(info.config.cl_name_version_array);
-                }
             }
             else
             {
@@ -1179,7 +951,7 @@
                 err = getConfigInfo(device, &info);
                 if (!err)
                 {
-                    dumpConfigInfo(&info);
+                    dumpConfigInfo(device, &info);
                 }
                 else
                 {
@@ -1194,67 +966,6 @@
     return total_errors;
 }
 
-config_info config_platform_infos[] = {
-    // CL_PLATFORM_VERSION has to be first defined with version 0 0.
-    CONFIG_INFO(0, 0, CL_PLATFORM_VERSION, string),
-    CONFIG_INFO(1, 1, CL_PLATFORM_PROFILE, string),
-    CONFIG_INFO(1, 1, CL_PLATFORM_NAME, string),
-    CONFIG_INFO(1, 1, CL_PLATFORM_VENDOR, string),
-    CONFIG_INFO(1, 1, CL_PLATFORM_EXTENSIONS, string),
-    CONFIG_INFO(3, 0, CL_PLATFORM_EXTENSIONS_WITH_VERSION,
-                cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_PLATFORM_NUMERIC_VERSION, cl_name_version)
-};
-
-int getPlatformCapabilities(cl_platform_id platform)
-{
-    int total_errors = 0;
-    version_t version = { 0, 0 }; // Version of the device. Will get real value
-                                  // on the first loop iteration.
-    int err;
-    for (unsigned onConfigInfo = 0; onConfigInfo
-         < sizeof(config_platform_infos) / sizeof(config_platform_infos[0]);
-         onConfigInfo++)
-    {
-        config_info info = config_platform_infos[onConfigInfo];
-
-        if (vercmp(version, info.version) >= 0)
-        {
-            err = getPlatformConfigInfo(platform, &info);
-            if (!err)
-            {
-                dumpConfigInfo(&info);
-                if (info.opcode == CL_PLATFORM_VERSION)
-                {
-                    err = parseVersion(info.config.string, &version);
-                    if (err)
-                    {
-                        total_errors++;
-                        free(info.config.string);
-                        break;
-                    }
-                }
-                if (info.config_type == type_string)
-                {
-                    free(info.config.string);
-                }
-                if (info.config_type == type_cl_name_version_array)
-                {
-                    free(info.config.cl_name_version_array);
-                }
-            }
-            else
-            {
-                total_errors++;
-            }
-        }
-        else
-        {
-            log_info("\tSkipped: %s.\n", info.opcode_name);
-        }
-    }
-    return total_errors;
-}
 
 int test_computeinfo(cl_device_id deviceID, cl_context context,
                      cl_command_queue ignoreQueue, int num_elements)
@@ -1265,11 +976,23 @@
 
     err = clGetPlatformIDs(1, &platform, NULL);
     test_error(err, "clGetPlatformIDs failed");
+    if (err != CL_SUCCESS)
+    {
+        total_errors++;
+    }
 
     // print platform info
     log_info("\nclGetPlatformInfo:\n------------------\n");
-    err = getPlatformCapabilities(platform);
-    test_error(err, "getPlatformCapabilities failed");
+    print_platform_string_selector(platform, "CL_PLATFORM_PROFILE",
+                                   CL_PLATFORM_PROFILE);
+    print_platform_string_selector(platform, "CL_PLATFORM_VERSION",
+                                   CL_PLATFORM_VERSION);
+    print_platform_string_selector(platform, "CL_PLATFORM_NAME",
+                                   CL_PLATFORM_NAME);
+    print_platform_string_selector(platform, "CL_PLATFORM_VENDOR",
+                                   CL_PLATFORM_VENDOR);
+    print_platform_string_selector(platform, "CL_PLATFORM_EXTENSIONS",
+                                   CL_PLATFORM_EXTENSIONS);
     log_info("\n");
 
     // Check to see if this test is being run on a specific device
@@ -1420,16 +1143,10 @@
 
 extern int test_extended_versioning(cl_device_id, cl_context, cl_command_queue,
                                     int);
-extern int test_device_uuid(cl_device_id, cl_context, cl_command_queue, int);
-
-extern int test_conformance_version(cl_device_id, cl_context, cl_command_queue,
-                                    int);
 
 test_definition test_list[] = {
     ADD_TEST(computeinfo),
     ADD_TEST(extended_versioning),
-    ADD_TEST(device_uuid),
-    ADD_TEST_VERSION(conformance_version, Version(3, 0)),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
@@ -1459,5 +1176,6 @@
         }
     }
 
-    return runTestHarness(argCount, argList, test_num, test_list, true, 0);
+    return runTestHarness(argCount, argList, test_num, test_list, false, true,
+                          0);
 }

diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index dddebb4..6d80dee 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp

@@ -576,39 +576,56 @@
         "}\n"
         "\n" };
 
-    for (i = 0; i < sizeof(sizeNames) / sizeof(sizeNames[0]); i++)
+    for( i = 0; i < sizeof( sizeNames ) / sizeof( sizeNames[0] ); i++ )
     {
-        size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
+        size_t strCount = sizeof( kernels ) / sizeof( kernels[0] );
         kernels[0] = "";
 
-        for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames[i];
-        error = create_single_kernel_helper(gContext, &gProgram[i], nullptr,
-                                            strCount, kernels, nullptr);
-        if (CL_SUCCESS != error || nullptr == gProgram[i])
+        for( j = 2; j < strCount; j += 2 )
+            kernels[j] = sizeNames[i];
+
+        gProgram[i] = clCreateProgramWithSource(gContext, strCount, kernels, NULL, &error);
+        if( NULL == gProgram[i] )
         {
-            log_error("Error: Unable to create test program! (%s) (in %s:%d)\n",
-                      IGetErrorString(error), __FILE__, __LINE__);
+            vlog_error( "clCreateProgramWithSource failed\n" );
+            return TEST_FAIL;
+        }
+
+        if(( error = clBuildProgram(gProgram[i], 1, &device, NULL, NULL, NULL) ))
+        {
+            vlog_error( "clBuildProgramExecutable failed\n" );
+            char build_log[2048] = "";
+
+            clGetProgramBuildInfo(gProgram[i], device, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, NULL);
+            vlog_error( "Log:\n%s\n", build_log );
             return TEST_FAIL;
         }
     }
 
-    if (gHasDouble)
+    if( gHasDouble )
     {
         kernels[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-        for (i = 0; i < sizeof(sizeNames_double) / sizeof(sizeNames_double[0]);
-             i++)
+        for( i = 0; i < sizeof( sizeNames_double ) / sizeof( sizeNames_double[0] ); i++ )
         {
-            size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
+            size_t strCount = sizeof( kernels ) / sizeof( kernels[0] );
 
-            for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames_double[i];
-            error = create_single_kernel_helper(gContext, &gProgram_double[i],
-                                                nullptr, strCount, kernels,
-                                                nullptr);
-            if (CL_SUCCESS != error || nullptr == gProgram_double[i])
+            for( j = 2; j < strCount; j += 2 )
+                kernels[j] = sizeNames_double[i];
+
+            gProgram_double[i] = clCreateProgramWithSource(gContext, strCount, kernels, NULL, &error);
+            if( NULL == gProgram_double[i] )
             {
-                log_error(
-                    "Error: Unable to create test program! (%s) (in %s:%d)\n",
-                    IGetErrorString(error), __FILE__, __LINE__);
+                vlog_error( "clCreateProgramWithSource failed\n" );
+                return TEST_FAIL;
+            }
+
+            if(( error = clBuildProgram(gProgram_double[i], 1, &device, NULL, NULL, NULL) ))
+            {
+                vlog_error( "clBuildProgramExecutable failed\n" );
+                char build_log[2048] = "";
+
+                clGetProgramBuildInfo(gProgram_double[i], device, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, NULL);
+                vlog_error( "Log:\n%s\n", build_log );
                 return TEST_FAIL;
             }
         }

diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
index 523b6ea..2dd0d83 100644
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt

@@ -4,7 +4,7 @@
       Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
 )
 
-if("${CLConform_TARGET_ARCH}" STREQUAL "ARM" OR "${CLConform_TARGET_ARCH}" STREQUAL "ARM64")
+if(CMAKE_COMPILER_IS_GNUCXX AND "${CLConform_TARGET_ARCH}" STREQUAL "ARM")
     list(APPEND ${MODULE_NAME}_SOURCES fplib.cpp)
 endif()
 

diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 3299884..d32694a 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp

@@ -21,11 +21,11 @@
 
 #include "harness/mt19937.h"
 
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#if defined( __arm__ ) && defined( __GNUC__ )
 #include "fplib.h"
 #endif
 
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#if defined( __arm__ ) && defined( __GNUC__ )
 /* Rounding modes and saturation for use with qcom 64 bit to float conversion library */
     bool            qcom_sat;
     roundingMode    qcom_rm;
@@ -678,8 +678,7 @@
 static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; }
 static void uint2float( void *out, void *in)
 {
-    // Use volatile to prevent optimization by Clang compiler
-    volatile cl_uint l = ((cl_uint *)in)[0];
+    cl_uint l = ((cl_uint*) in)[0];
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
 }
 static void uint2double( void *out, void *in)
@@ -760,18 +759,12 @@
     ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
 #else
     cl_ulong l = ((cl_ulong*) in)[0];
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-    /* ARM VFP doesn't have hardware instruction for converting from 64-bit
-     * integer to float types, hence GCC ARM uses the floating-point emulation
-     * code despite which -mfloat-abi setting it is. But the emulation code in
-     * libgcc.a has only one rounding mode (round to nearest even in this case)
+#if defined( __arm__ ) && defined( __GNUC__ )
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
      * and ignores the user rounding mode setting in hardware.
-     * As a result setting rounding modes in hardware won't give correct
-     * rounding results for type covert from 64-bit integer to float using GCC
-     * for ARM compiler so for testing different rounding modes, we need to use
-     * alternative reference function. ARM64 does have an instruction, however
-     * we cannot guarantee the compiler will use it.  On all ARM architechures
-     * use emulation to calculate reference.*/
+     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+     * so for testing different rounding modes, we need to use alternative reference function */
     ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
 #else
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
@@ -792,8 +785,7 @@
 #endif
     ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
 #else
-    // Use volatile to prevent optimization by Clang compiler
-    volatile cl_ulong l = ((cl_ulong *)in)[0];
+    cl_ulong l = ((cl_ulong*) in)[0];
     ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
 #endif
 }
@@ -814,18 +806,12 @@
     ((float*) out)[0] = (l == 0 ? 0.0f : result);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
 #else
     cl_long l = ((cl_long*) in)[0];
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-    /* ARM VFP doesn't have hardware instruction for converting from 64-bit
-     * integer to float types, hence GCC ARM uses the floating-point emulation
-     * code despite which -mfloat-abi setting it is. But the emulation code in
-     * libgcc.a has only one rounding mode (round to nearest even in this case)
+#if defined( __arm__ ) && defined( __GNUC__ )
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
      * and ignores the user rounding mode setting in hardware.
-     * As a result setting rounding modes in hardware won't give correct
-     * rounding results for type covert from 64-bit integer to float using GCC
-     * for ARM compiler so for testing different rounding modes, we need to use
-     * alternative reference function. ARM64 does have an instruction, however
-     * we cannot guarantee the compiler will use it.  On all ARM architechures
-     * use emulation to calculate reference.*/
+     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+     * so for testing different rounding modes, we need to use alternative reference function */
     ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
 #else
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0

diff --git a/test_conformance/conversions/fplib.cpp b/test_conformance/conversions/fplib.cpp
index e739b9a..a18b919 100644
--- a/test_conformance/conversions/fplib.cpp
+++ b/test_conformance/conversions/fplib.cpp

@@ -17,9 +17,7 @@
 #include <math.h>
 #include "fplib.h"
 
-#if !defined(FLT_MANT_DIG)
 #define FLT_MANT_DIG    24
-#endif
 #define as_float(x)     (*((float *)(&x)))
 #define as_long(x)      (*((int64_t *)(&x)))
 
@@ -29,8 +27,8 @@
 
     for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
     {
-        volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros);
-        if (v) break;
+        if(0x8000000000000000 & (value << num_zeros))
+            break;
     }
     return num_zeros;
 }
@@ -147,9 +145,6 @@
                     return as_float(result);
             }
         }
-        case qcomRoundingModeCount: {
-            break; // Avoid build error for unhandled enum value
-        }
     }
     return 0.0f;
 }
@@ -221,9 +216,6 @@
             uint32_t result = exponent | mantissa;
             return as_float(result); // for positive inputs return RTZ result
         }
-        case qcomRoundingModeCount: {
-            break; // Avoid build error for unhandled enum value
-        }
     }
     return 0.0f;
 }

diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 87b8ead..d73df06 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp

@@ -19,7 +19,7 @@
 #include "harness/testHarness.h"
 #include "harness/kernelHelpers.h"
 #include "harness/parseParameters.h"
-#if defined(__APPLE__)
+#if !defined(_WIN32) && !defined(__ANDROID__)
 #include <sys/sysctl.h>
 #endif
 
@@ -50,6 +50,8 @@
 #include "Sleep.h"
 #include "basic_test_conversions.h"
 
+#pragma STDC FENV_ACCESS ON
+
 #if (defined(_WIN32) && defined (_MSC_VER))
 // need for _controlfp_s and rouinding modes in RoundingMode
 #include "harness/testHarness.h"
@@ -65,7 +67,7 @@
 
 #define      kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
 
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#if defined( __arm__ ) && defined( __GNUC__ )
 #include "fplib.h"
     extern bool            qcom_sat;
     extern roundingMode    qcom_rm;
@@ -317,11 +319,10 @@
     int ret = runTestHarnessWithCheck( 1, arg, test_num, test_list, true, 0, InitCL );
 
     free_mtdata( gMTdata );
-    if (gQueue)
-    {
-        error = clFinish(gQueue);
-        if (error) vlog_error("clFinish failed: %d\n", error);
-    }
+
+    error = clFinish(gQueue);
+    if (error)
+        vlog_error("clFinish failed: %d\n", error);
 
     clReleaseMemObject(gInBuffer);
 
@@ -884,18 +885,12 @@
         if( info->sat )
             f = gSaturatedConversions[ outType ][ inType ];
 
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-        /* ARM VFP doesn't have hardware instruction for converting from 64-bit
-         * integer to float types, hence GCC ARM uses the floating-point
-         * emulation code despite which -mfloat-abi setting it is. But the
-         * emulation code in libgcc.a has only one rounding mode (round to
-         * nearest even in this case) and ignores the user rounding mode setting
-         * in hardware. As a result setting rounding modes in hardware won't
-         * give correct rounding results for type covert from 64-bit integer to
-         * float using GCC for ARM compiler so for testing different rounding
-         * modes, we need to use alternative reference function. ARM64 does have
-         * an instruction, however we cannot guarantee the compiler will use it.
-         * On all ARM architechures use emulation to calculate reference.*/
+#if defined( __arm__ ) && defined( __GNUC__ )
+       /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+        * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
+        * and ignores the user rounding mode setting in hardware.
+        * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+        * so for testing different rounding modes, we need to use alternative reference function */
         switch (round)
         {
             /* conversions to floating-point type use the current rounding mode.

diff --git a/test_conformance/d3d10/harness.cpp b/test_conformance/d3d10/harness.cpp
index 93f2281..ffdfea5 100644
--- a/test_conformance/d3d10/harness.cpp
+++ b/test_conformance/d3d10/harness.cpp

@@ -367,12 +367,41 @@
         const char *sourceTexts[] = {source};
         size_t sourceLengths[] = {strlen(source) };
 
-        status = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &sourceTexts[0], entrypoint);
+        status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]);
         TestRequire(
             CL_SUCCESS == status,
             "clCreateProgramWithSource failed");
     }
+    status = clBuildProgram(
+        program,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        NULL);
+    if (CL_SUCCESS != status)
+    {
+        char log[2048] = {0};
+        status = clGetProgramBuildInfo(
+            program,
+            device,
+            CL_PROGRAM_BUILD_LOG,
+            sizeof(log),
+            log,
+            NULL);
+        TestPrint("error: %s\n", log);
+        TestRequire(
+            CL_SUCCESS == status,
+            "Compilation error log:\n%s\n", log);
+    }
+
+    kernel = clCreateKernel(
+        program,
+        entrypoint,
+        &status);
+    TestRequire(
+        CL_SUCCESS == status,
+        "clCreateKernel failed");
 
     clReleaseProgram(program);
     *outKernel = kernel;

diff --git a/test_conformance/d3d11/harness.cpp b/test_conformance/d3d11/harness.cpp
index 90ba200..687c6da 100644
--- a/test_conformance/d3d11/harness.cpp
+++ b/test_conformance/d3d11/harness.cpp

@@ -400,10 +400,41 @@
         const char *sourceTexts[] = {source};
         size_t sourceLengths[] = {strlen(source) };
 
-        status = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &sourceTexts[0], entrypoint);
-        TestRequire(CL_SUCCESS == status, "Kernel creation failed");
+        status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]);
+        TestRequire(
+            CL_SUCCESS == status,
+            "clCreateProgramWithSource failed");
     }
+    status = clBuildProgram(
+        program,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        NULL);
+    if (CL_SUCCESS != status)
+    {
+        char log[2048] = {0};
+        status = clGetProgramBuildInfo(
+            program,
+            device,
+            CL_PROGRAM_BUILD_LOG,
+            sizeof(log),
+            log,
+            NULL);
+        TestPrint("error: %s\n", log);
+        TestRequire(
+            CL_SUCCESS == status,
+            "Compilation error log:\n%s\n", log);
+    }
+
+    kernel = clCreateKernel(
+        program,
+        entrypoint,
+        &status);
+    TestRequire(
+        CL_SUCCESS == status,
+        "clCreateKernel failed");
 
     clReleaseProgram(program);
     *outKernel = kernel;

diff --git a/test_conformance/device_execution/CMakeLists.txt b/test_conformance/device_execution/CMakeLists.txt
index 5e9e30e..d99ba21 100644
--- a/test_conformance/device_execution/CMakeLists.txt
+++ b/test_conformance/device_execution/CMakeLists.txt

@@ -8,7 +8,6 @@
     enqueue_multi_queue.cpp
     enqueue_ndrange.cpp
     enqueue_wg_size.cpp
-    enqueue_profiling.cpp
     execute_block.cpp
     host_multi_queue.cpp
     host_queue_order.cpp

diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp
index 8ced662..84ac339 100644
--- a/test_conformance/device_execution/enqueue_ndrange.cpp
+++ b/test_conformance/device_execution/enqueue_ndrange.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -27,316 +27,271 @@
 
 #ifdef CL_VERSION_2_0
 extern int gWimpyMode;
-static const char *helper_ndrange_1d_glo[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
-    "atomic_uint* val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int i = 0; i < n; i++)" NL,
-    "  {" NL,
-    "    ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);" NL,
-    "    int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_1d_glo[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);"
+    NL, "    int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
-static const char *helper_ndrange_1d_loc[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
-    "atomic_uint* val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int k = 0; k < n; k++)" NL,
-    "  {" NL,
-    "    for(int i = 0; i < n; i++)" NL,
-    "    {" NL,
-    "      if (glob_size_arr[i] >= loc_size_arr[k])" NL,
-    "      {" NL,
-    "        ndrange_t ndrange = ndrange_1D(glob_size_arr[i], "
-    "loc_size_arr[k]);" NL,
-    "        int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_1d_loc[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      if (glob_size_arr[i] >= loc_size_arr[k])"
+    NL, "      {"
+    NL, "        ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
-static const char *helper_ndrange_1d_ofs[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[(get_global_offset(0) + "
-    "get_global_linear_id()) % len], 1u, memory_order_relaxed, "
-    "memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
-    "atomic_uint* val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int l = 0; l < n; l++)" NL,
-    "  {" NL,
-    "    for(int k = 0; k < n; k++)" NL,
-    "    {" NL,
-    "      for(int i = 0; i < n; i++)" NL,
-    "      {" NL,
-    "        if (glob_size_arr[i] >= loc_size_arr[k])" NL,
-    "        {" NL,
-    "          ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], "
-    "loc_size_arr[k]);" NL,
-    "          int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "        }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_1d_ofs[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        if (glob_size_arr[i] >= loc_size_arr[k])"
+    NL, "        {"
+    NL, "          ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
-static const char *helper_ndrange_2d_glo[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int i = 0; i < n; i++)" NL,
-    "  {" NL,
-    "    size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] "
-    "};" NL,
-    "    ndrange_t ndrange = ndrange_2D(glob_size);" NL,
-    "    int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_2d_glo[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
+    NL, "    ndrange_t ndrange = ndrange_2D(glob_size);"
+    NL, "    int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
-static const char *helper_ndrange_2d_loc[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int k = 0; k < n; k++)" NL,
-    "  {" NL,
-    "    for(int i = 0; i < n; i++)" NL,
-    "    {" NL,
-    "      if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
-    "      {" NL,
-    "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
-    "n] };" NL,
-    "        size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
-    "" NL,
-    "        ndrange_t ndrange = ndrange_2D(glob_size, loc_size);" NL,
-    "        int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_2d_loc[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
+    NL, "      {"
+    NL, "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
+    NL, "        size_t loc_size[] = { 1, loc_size_arr[k] };"
+    NL, ""
+    NL, "        ndrange_t ndrange = ndrange_2D(glob_size, loc_size);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
 
-static const char *helper_ndrange_2d_ofs[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[(get_global_offset(1) * "
-    "get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
-    "len], 1u, memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int l = 0; l < n; l++)" NL,
-    "  {" NL,
-    "    for(int k = 0; k < n; k++)" NL,
-    "    {" NL,
-    "      for(int i = 0; i < n; i++)" NL,
-    "      {" NL,
-    "        if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
-    "        {" NL,
-    "          size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) "
-    "% n]};" NL,
-    "          size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
-    "          size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };" NL,
-    "" NL,
-    "          ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);" NL,
-    "          int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "        }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_2d_ofs[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
+    NL, "        {"
+    NL, "          size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};"
+    NL, "          size_t loc_size[] = { 1, loc_size_arr[k] };"
+    NL, "          size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };"
+    NL, ""
+    NL, "          ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
 
-static const char *helper_ndrange_3d_glo[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int i = 0; i < n; i++)" NL,
-    "  {" NL,
-    "    uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % "
-    "n] * glob_size_arr[(i + 2) % n];" NL,
-    "    if (global_work_size <= (len * len))" NL,
-    "    {" NL,
-    "      size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
-    "n], glob_size_arr[(i + 2) % n] };" NL,
-    "      ndrange_t ndrange = ndrange_3D(glob_size);" NL,
-    "      int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "      if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_3d_glo[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "    if (global_work_size <= (len * len))"
+    NL, "    {"
+    NL, "      size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
+    NL, "      ndrange_t ndrange = ndrange_3D(glob_size);"
+    NL, "      int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
 
-static const char *helper_ndrange_3d_loc[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int k = 0; k < n; k++)" NL,
-    "  {" NL,
-    "    for(int i = 0; i < n; i++)" NL,
-    "    {" NL,
-    "      uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % "
-    "n] * glob_size_arr[(i + 2) % n];" NL,
-    "      if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
-    "global_work_size <= (len * len))" NL,
-    "      {" NL,
-    "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
-    "n], glob_size_arr[(i + 2) % n] };" NL,
-    "        size_t loc_size[] = { 1, 1, loc_size_arr[k] };" NL,
-    "        ndrange_t ndrange = ndrange_3D(glob_size,loc_size);" NL,
-    "        int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "      " NL,
-    "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_3d_loc[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "      if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
+    NL, "      {"
+    NL, "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
+    NL, "        size_t loc_size[] = { 1, 1, loc_size_arr[k] };"
+    NL, "        ndrange_t ndrange = ndrange_3D(glob_size,loc_size);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      "
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
-static const char *helper_ndrange_3d_ofs[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[(get_global_offset(2) * "
-    "get_global_size(0) * get_global_size(1) + get_global_offset(1) * "
-    "get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
-    "len], 1u, memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int l = 0; l < n; l++)" NL,
-    "  {" NL,
-    "    for(int k = 0; k < n; k++)" NL,
-    "    {" NL,
-    "      for(int i = 0; i < n; i++)" NL,
-    "      {" NL,
-    "        uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) "
-    "% n] * glob_size_arr[(i + 2) % n];" NL,
-    "        if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
-    "global_work_size <= (len * len))" NL,
-    "        {" NL,
-    "          size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) "
-    "% n], glob_size_arr[(i + 2) % n]};" NL,
-    "          size_t loc_size[3] = { 1, 1, loc_size_arr[k] };" NL,
-    "          size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l "
-    "+ 2) % n] };" NL,
-    "          ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);" NL,
-    "          int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "        }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_3d_ofs[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "        if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
+    NL, "        {"
+    NL, "          size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};"
+    NL, "          size_t loc_size[3] = { 1, 1, loc_size_arr[k] };"
+    NL, "          size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };"
+    NL, "          ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 };
 
 static const kernel_src_dim_check sources_ndrange_Xd[] =

diff --git a/test_conformance/device_execution/enqueue_profiling.cpp b/test_conformance/device_execution/enqueue_profiling.cpp
deleted file mode 100644
index b9e1a17..0000000
--- a/test_conformance/device_execution/enqueue_profiling.cpp
+++ /dev/null

@@ -1,160 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <string.h>
-#include "harness/testHarness.h"
-#include "harness/typeWrappers.h"
-
-#include <vector>
-
-#include "procs.h"
-#include "utils.h"
-#include <time.h>
-
-static int max_nestingLevel = 10;
-
-static const char* enqueue_multi_level = R"(
-    void block_fn(__global int* res, int level)
-    {
-      queue_t def_q = get_default_queue();
-      if(--level < 0) return;
-      void (^kernelBlock)(void) = ^{ block_fn(res, level); };
-      ndrange_t ndrange = ndrange_1D(1);
-      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
-      if(enq_res != CLK_SUCCESS) { (*res) = -1; return; }
-      else if(*res != -1) { (*res)++; }
-    }
-    kernel void enqueue_multi_level(__global int* res, int level)
-    {
-      *res = 0;
-      block_fn(res, level);
-    })";
-
-int test_enqueue_profiling(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements)
-{
-    cl_int err_ret, res = 0;
-    clCommandQueueWrapper dev_queue;
-    clCommandQueueWrapper host_queue;
-
-    cl_uint maxQueueSize = 0;
-    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
-                              sizeof(maxQueueSize), &maxQueueSize, 0);
-    test_error(err_ret,
-               "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
-
-    cl_queue_properties dev_queue_prop_def[] = {
-        CL_QUEUE_PROPERTIES,
-        CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE
-            | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE,
-        CL_QUEUE_SIZE, maxQueueSize, 0
-    };
-
-    dev_queue = clCreateCommandQueueWithProperties(
-        context, device, dev_queue_prop_def, &err_ret);
-    test_error(err_ret,
-               "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
-               "DEFAULT) failed");
-
-    cl_queue_properties host_queue_prop_def[] = { CL_QUEUE_PROPERTIES,
-                                                  CL_QUEUE_PROFILING_ENABLE,
-                                                  0 };
-
-    host_queue = clCreateCommandQueueWithProperties(
-        context, device, host_queue_prop_def, &err_ret);
-    test_error(err_ret,
-               "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
-               "DEFAULT) failed");
-
-    cl_int status;
-    size_t size = 1;
-    cl_int result = 0;
-
-    clMemWrapper res_mem;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    cl_event kernel_event;
-
-    err_ret = create_single_kernel_helper(context, &program, &kernel, 1,
-                                          &enqueue_multi_level,
-                                          "enqueue_multi_level");
-    if (check_error(err_ret, "Create single kernel failed")) return -1;
-
-    res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                             sizeof(result), &result, &err_ret);
-    test_error(err_ret, "clCreateBuffer() failed");
-
-    err_ret = clSetKernelArg(kernel, 0, sizeof(res_mem), &res_mem);
-    test_error(err_ret, "clSetKernelArg(0) failed");
-
-    for (int level = 0; level < max_nestingLevel; level++)
-    {
-        err_ret = clSetKernelArg(kernel, 1, sizeof(level), &level);
-        test_error(err_ret, "clSetKernelArg(1) failed");
-
-        err_ret = clEnqueueNDRangeKernel(host_queue, kernel, 1, NULL, &size,
-                                         &size, 0, NULL, &kernel_event);
-        test_error(err_ret,
-                   "clEnqueueNDRangeKernel('enqueue_multi_level') failed");
-
-        err_ret = clEnqueueReadBuffer(host_queue, res_mem, CL_TRUE, 0,
-                                      sizeof(result), &result, 0, NULL, NULL);
-        test_error(err_ret, "clEnqueueReadBuffer() failed");
-
-        if (result != level)
-        {
-            log_error("Kernel execution should return the maximum nesting "
-                      " level (got %d instead of %d)",
-                      result, level);
-            return -1;
-        }
-
-        err_ret =
-            clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                           sizeof(status), &status, NULL);
-        test_error(err_ret, "clGetEventInfo() failed");
-
-        if (check_error(status, "Kernel execution status %d", status))
-            return status;
-
-        cl_ulong end;
-        err_ret = clGetEventProfilingInfo(
-            kernel_event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL);
-        test_error(err_ret, "clGetEventProfilingInfo() failed");
-
-        cl_ulong complete;
-        err_ret =
-            clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_COMPLETE,
-                                    sizeof(complete), &complete, NULL);
-        test_error(err_ret, "clGetEventProfilingInfo() failed");
-
-        if (end > complete)
-        {
-            log_error(
-                "Profiling END should be smaller than or equal to COMPLETE for "
-                "kernels that use the on-device queue");
-            return -1;
-        }
-
-        log_info("Profiling info for '%s' kernel is OK for level %d.\n",
-                 "enqueue_multi_level", level);
-
-        clReleaseEvent(kernel_event);
-    }
-
-    return res;
-}

diff --git a/test_conformance/device_execution/host_multi_queue.cpp b/test_conformance/device_execution/host_multi_queue.cpp
index 661d33d..e9a675c 100644
--- a/test_conformance/device_execution/host_multi_queue.cpp
+++ b/test_conformance/device_execution/host_multi_queue.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -184,11 +184,7 @@
                 global = 16;
             }
 
-            err_ret |= create_single_kernel_helper(
-                context, &program[i], &kernel[i],
-                sources_multi_queue_block[i].num_lines,
-                sources_multi_queue_block[i].lines,
-                sources_multi_queue_block[i].kernel_name);
+            err_ret |= create_single_kernel_helper_with_build_options(context, &program[i], &kernel[i], sources_multi_queue_block[i].num_lines, sources_multi_queue_block[i].lines, sources_multi_queue_block[i].kernel_name, "-cl-std=CL2.0");
             if(check_error(err_ret, "Create single kernel failed")) { res = -1; break; }
 
             mem[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);

diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp
index 2b5688d..5dce160 100644
--- a/test_conformance/device_execution/host_queue_order.cpp
+++ b/test_conformance/device_execution/host_queue_order.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -133,14 +133,10 @@
 
     cl_event kernel_event;
 
-    err_ret = create_single_kernel_helper(
-        context, &program1, &kernel1, arr_size(enqueue_block_first_kernel),
-        enqueue_block_first_kernel, "enqueue_block_first_kernel");
+    err_ret = create_single_kernel_helper_with_build_options(context, &program1, &kernel1,  arr_size(enqueue_block_first_kernel), enqueue_block_first_kernel, "enqueue_block_first_kernel", "-cl-std=CL2.0");
     if(check_error(err_ret, "Create single kernel failed")) return -1;
 
-    err_ret = create_single_kernel_helper(
-        context, &program2, &kernel2, arr_size(enqueue_block_second_kernel),
-        enqueue_block_second_kernel, "enqueue_block_second_kernel");
+    err_ret = create_single_kernel_helper_with_build_options(context, &program2, &kernel2, arr_size(enqueue_block_second_kernel), enqueue_block_second_kernel, "enqueue_block_second_kernel", "-cl-std=CL2.0");
     if(check_error(err_ret, "Create single kernel failed")) return -1;
 
     res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);

diff --git a/test_conformance/device_execution/main.cpp b/test_conformance/device_execution/main.cpp
index a3d0d8d..4c0b692 100644
--- a/test_conformance/device_execution/main.cpp
+++ b/test_conformance/device_execution/main.cpp

@@ -31,22 +31,18 @@
 test_status InitCL(cl_device_id device) {
   auto version = get_device_cl_version(device);
   auto expected_min_version = Version(2, 0);
-  if (version < expected_min_version)
-  {
-      version_expected_info("Test", "OpenCL",
-                            expected_min_version.to_string().c_str(),
-                            version.to_string().c_str());
-      return TEST_SKIP;
+  if (version < expected_min_version) {
+    version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+    return TEST_SKIP;
   }
 
   int error;
   cl_uint max_queues_size;
   error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES,
                           sizeof(max_queues_size), &max_queues_size, NULL);
-  if (error != CL_SUCCESS)
-  {
-      print_error(error, "Unable to get max queues on device");
-      return TEST_FAIL;
+  if (error != CL_SUCCESS) {
+    print_error(error, "Unable to get max queues on device");
+    return TEST_FAIL;
   }
 
   if ((max_queues_size == 0) && (version >= Version(3, 0)))
@@ -58,12 +54,17 @@
 }
 
 test_definition test_list[] = {
-    ADD_TEST(device_info),           ADD_TEST(device_queue),
-    ADD_TEST(execute_block),         ADD_TEST(enqueue_block),
-    ADD_TEST(enqueue_nested_blocks), ADD_TEST(enqueue_wg_size),
-    ADD_TEST(enqueue_flags),         ADD_TEST(enqueue_multi_queue),
-    ADD_TEST(host_multi_queue),      ADD_TEST(enqueue_ndrange),
-    ADD_TEST(host_queue_order),      ADD_TEST(enqueue_profiling),
+    ADD_TEST( device_info ),
+    ADD_TEST( device_queue ),
+    ADD_TEST( execute_block ),
+    ADD_TEST( enqueue_block ),
+    ADD_TEST( enqueue_nested_blocks ),
+    ADD_TEST( enqueue_wg_size ),
+    ADD_TEST( enqueue_flags ),
+    ADD_TEST( enqueue_multi_queue ),
+    ADD_TEST( host_multi_queue ),
+    ADD_TEST( enqueue_ndrange ),
+    ADD_TEST( host_queue_order ),
 };
 
 const int test_num = ARRAY_SIZE( test_list );

diff --git a/test_conformance/device_execution/procs.h b/test_conformance/device_execution/procs.h
index 087dafc..8f668ed 100644
--- a/test_conformance/device_execution/procs.h
+++ b/test_conformance/device_execution/procs.h

@@ -26,8 +26,6 @@
 extern int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_enqueue_profiling(cl_device_id device, cl_context context,
-                                  cl_command_queue queue, int num_elements);
 
 extern int test_execution_stress(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 

diff --git a/test_conformance/device_execution/utils.cpp b/test_conformance/device_execution/utils.cpp
index 05b6949..66a2211 100644
--- a/test_conformance/device_execution/utils.cpp
+++ b/test_conformance/device_execution/utils.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -40,8 +40,7 @@
     cl_uint i;
     size_t ret_len;
 
-    err_ret = create_single_kernel_helper(context, &program, &kernel, num_lines,
-                                          source, kernel_name);
+    err_ret = create_single_kernel_helper_with_build_options(context, &program, &kernel, num_lines, source, kernel_name, "-cl-std=CL2.0");
     if(check_error(err_ret, "Create single kernel failed")) return -1;
 
     mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, res_size, results, &err_ret);

diff --git a/test_conformance/device_partition/main.cpp b/test_conformance/device_partition/main.cpp
index a8af6ff..f5f081e 100644
--- a/test_conformance/device_partition/main.cpp
+++ b/test_conformance/device_partition/main.cpp

@@ -41,5 +41,5 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, true, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, true, 0 );
 }

diff --git a/test_conformance/device_partition/test_device_partition.cpp b/test_conformance/device_partition/test_device_partition.cpp
index f9952ec..b90fca8 100644
--- a/test_conformance/device_partition/test_device_partition.cpp
+++ b/test_conformance/device_partition/test_device_partition.cpp

@@ -265,8 +265,7 @@
     for( i = 0; i < TEST_SIZE; i++ )
         data[i] = genrand_int32(seed);
 
-    stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                            sizeof(cl_int) * TEST_SIZE, data, &error);
+    stream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * TEST_SIZE, data, &error);
     test_error( error, "Unable to create test array" );
 
     // Update the expected results

diff --git a/test_conformance/device_timer/main.cpp b/test_conformance/device_timer/main.cpp
index 1c460af..9539143 100644
--- a/test_conformance/device_timer/main.cpp
+++ b/test_conformance/device_timer/main.cpp

@@ -30,37 +30,34 @@
     ADD_TEST( device_and_host_timers ),
 };
 
-test_status InitCL(cl_device_id device)
-{
-    auto version = get_device_cl_version(device);
-    auto expected_min_version = Version(2, 1);
-    cl_platform_id platform;
-    cl_ulong timer_res;
-    cl_int error;
+test_status InitCL(cl_device_id device) {
+	auto version = get_device_cl_version(device);
+	auto expected_min_version = Version(2, 1);
+	cl_platform_id platform;
+	cl_ulong timer_res;
+	cl_int error;
 
-    if (version < expected_min_version)
-    {
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
-        return TEST_SKIP;
-    }
+	if (version < expected_min_version)
+	{
+		version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+		return TEST_SKIP;
+	}
 
-    error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
-                            &platform, NULL);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "Unable to get device platform");
-        return TEST_FAIL;
-    }
+	error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
+	                        sizeof(platform), &platform, NULL);
+	if (error != CL_SUCCESS)
+	{
+		print_error(error, "Unable to get device platform");
+		return TEST_FAIL;
+	}
 
-    error = clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
-                              sizeof(timer_res), &timer_res, NULL);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "Unable to get host timer capabilities");
-        return TEST_FAIL;
-    }
+	error = clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
+	                          sizeof(timer_res), &timer_res, NULL);
+	if (error != CL_SUCCESS)
+	{
+		print_error(error, "Unable to get host timer capabilities");
+		return TEST_FAIL;
+	}
 
     if ((timer_res == 0) && (version >= Version(3, 0)))
     {

diff --git a/test_conformance/events/action_classes.cpp b/test_conformance/events/action_classes.cpp
index d70d76b..122c21f 100644
--- a/test_conformance/events/action_classes.cpp
+++ b/test_conformance/events/action_classes.cpp

@@ -145,11 +145,9 @@
     error = get_max_common_work_group_size( context, mKernel, threads[0], &mLocalThreads[0] );
     test_error( error, "Unable to get work group size to use" );
 
-    mStreams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                 sizeof(cl_float) * 1000, NULL, &error);
+    mStreams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1000, NULL, &error );
     test_error( error, "Creating test array failed" );
-    mStreams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                 sizeof(cl_int) * 1000, NULL, &error);
+    mStreams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 1000, NULL, &error );
     test_error( error, "Creating test array failed" );
 
     /* Set the arguments */

diff --git a/test_conformance/events/main.cpp b/test_conformance/events/main.cpp
index 777d2d3..2aafb0e 100644
--- a/test_conformance/events/main.cpp
+++ b/test_conformance/events/main.cpp

@@ -62,6 +62,6 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index 4113654..0ab0f2a 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp

@@ -203,7 +203,7 @@
     // then incremented to 5s, repeatedly. Otherwise the values may be 2s (if the first one doesn't work) or 8s
     // (if the second one doesn't work).
     if (RANDOMIZE)
-        log_info("Queues chosen randomly for each kernel execution.\n");
+        log_info("Queues chosen randomly for each kernel exection.\n");
     else
         log_info("Queues chosen alternatily for each kernel execution.\n");
 

diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp
index 26693f9..c2524b1 100644
--- a/test_conformance/events/test_events.cpp
+++ b/test_conformance/events/test_events.cpp

@@ -47,11 +47,9 @@
     error = get_max_common_work_group_size( inContext, *outKernel, threads[0], &localThreads[0] );
     test_error( error, "Unable to get work group size to use" );
 
-    streams[0] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1000, NULL, &error);
+    streams[0] = clCreateBuffer(inContext, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1000, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * 1000, NULL, &error);
+    streams[1] = clCreateBuffer(inContext, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 1000, NULL, &error);
     test_error( error, "Creating test array failed" );
 
     /* Set the arguments */
@@ -180,8 +178,7 @@
     cl_int status;
 
 
-    stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                            sizeof(cl_float) * 1024 * 32, NULL, &error);
+    stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
 
     error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
@@ -215,8 +212,7 @@
     cl_int status;
 
 
-    stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                            sizeof(cl_float) * 1024 * 32, NULL, &error);
+    stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
 
     error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
@@ -286,11 +282,9 @@
     cl_int status;
 
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
 
     error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
@@ -427,11 +421,9 @@
     cl_int status;
 
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
 
     error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
@@ -517,12 +509,8 @@
     // Create a set of streams to use as arguments
     for( i = 0; i < NUM_EVENT_RUNS; i++ )
     {
-        streams[i][0] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * threads[0], NULL, &error);
-        streams[i][1] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_int) * threads[0], NULL, &error);
+        streams[i][0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * threads[0], NULL, &error );
+        streams[i][1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * threads[0], NULL, &error );
         if( ( streams[i][0] == NULL ) || ( streams[i][1] == NULL ) )
         {
             log_error( "ERROR: Unable to allocate testing streams" );

diff --git a/test_conformance/events/test_userevents.cpp b/test_conformance/events/test_userevents.cpp
index 0a4954f..7ad348f 100644
--- a/test_conformance/events/test_userevents.cpp
+++ b/test_conformance/events/test_userevents.cpp

@@ -207,10 +207,6 @@
     log_info("Unsuccessful user event case passed.\n");
   }
 
-  clReleaseKernel(k0);
-  clReleaseProgram(program);
-  clReleaseMemObject(output);
-
   return 0;
 
 }

diff --git a/test_conformance/events/test_userevents_multithreaded.cpp b/test_conformance/events/test_userevents_multithreaded.cpp
index 51ef222..2340790 100644
--- a/test_conformance/events/test_userevents_multithreaded.cpp
+++ b/test_conformance/events/test_userevents_multithreaded.cpp

@@ -16,19 +16,28 @@
 #include "testBase.h"
 #include "action_classes.h"
 #include "harness/conversions.h"
-
-#include <thread>
+#include "harness/genericThread.h"
 
 #if !defined (_MSC_VER)
     #include <unistd.h>
 #endif // !_MSC_VER
 
-void trigger_user_event(cl_event *event)
+class releaseEvent_thread : public genericThread
 {
-    usleep(1000000);
-    log_info("\tTriggering gate from separate thread...\n");
-    clSetUserEventStatus(*event, CL_COMPLETE);
-}
+    public:
+        releaseEvent_thread( cl_event *event ) : mEvent( event ) {}
+
+        cl_event * mEvent;
+
+    protected:
+        virtual void *    IRun( void )
+        {
+            usleep( 1000000 );
+            log_info( "\tTriggering gate from separate thread...\n" );
+            clSetUserEventStatus( *mEvent, CL_COMPLETE );
+            return NULL;
+        }
+};
 
 int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
@@ -57,14 +66,14 @@
     }
 
     // Now, instead of releasing the gate, we spawn a separate thread to do so
+    releaseEvent_thread thread( &gateEvent );
     log_info( "\tStarting trigger thread...\n" );
-    std::thread thread(trigger_user_event, &gateEvent);
+    thread.Start();
 
     log_info( "\tWaiting for actions...\n" );
     error = clWaitForEvents( 3, &actionEvents[ 0 ] );
     test_error( error, "Unable to wait for action events" );
 
-    thread.join();
     log_info( "\tActions completed.\n" );
 
     // If we got here without error, we're good

diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt
deleted file mode 100644
index 53d77ee..0000000
--- a/test_conformance/extensions/CMakeLists.txt
+++ /dev/null

@@ -1,2 +0,0 @@
-add_subdirectory( cl_ext_cxx_for_opencl )
-add_subdirectory( cl_khr_dx9_media_sharing )

diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt b/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt
deleted file mode 100644
index fd397c3..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt
+++ /dev/null

@@ -1,9 +0,0 @@
-set(MODULE_NAME CL_EXT_CXX_FOR_OPENCL)
-
-set(${MODULE_NAME}_SOURCES
-    main.cpp
-    cxx_for_opencl_ext.cpp
-    cxx_for_opencl_ver.cpp
-)
-
-include(../../CMakeCommon.txt)

diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
deleted file mode 100644
index 1d5252c..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
+++ /dev/null

@@ -1,104 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-
-
-int test_cxx_for_opencl(cl_device_id device, cl_context context,
-                        cl_command_queue queue)
-{
-    cl_int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel1;
-    clKernelWrapper kernel2;
-    clMemWrapper in_buffer;
-    clMemWrapper out_buffer;
-    cl_int value = 7;
-
-    const char *kernel_sstr =
-        R"(
-        __global int x;
-        template<typename T>
-        void execute(T &a, const T &b) {
-            a = b * 2;
-        }
-        __kernel void k1(__global int *p) {
-            execute(x, *p);
-        }
-        __kernel void k2(__global int *p) {
-            execute(*p, x);
-        })";
-
-    error = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++");
-    test_error(error, "Failed to create k1 kernel");
-
-    kernel2 = clCreateKernel(program, "k2", &error);
-    test_error(error, "Failed to create k2 kernel");
-
-    in_buffer =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(value), &value, &error);
-    test_error(error, "clCreateBuffer failed");
-
-    out_buffer =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(value), &value, &error);
-    test_error(error, "clCreateBuffer failed");
-
-    error = clSetKernelArg(kernel1, 0, sizeof(in_buffer), &in_buffer);
-    test_error(error, "clSetKernelArg failed");
-
-    error = clSetKernelArg(kernel2, 0, sizeof(out_buffer), &out_buffer);
-    test_error(error, "clSetKernelArg failed");
-
-    size_t global_size = 1;
-    error = clEnqueueNDRangeKernel(queue, kernel1, 1, nullptr, &global_size,
-                                   nullptr, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-
-    error = clEnqueueNDRangeKernel(queue, kernel2, 1, nullptr, &global_size,
-                                   nullptr, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-
-    error = clEnqueueReadBuffer(queue, out_buffer, CL_BLOCKING, 0,
-                                sizeof(value), &value, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueReadBuffer failed");
-
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
-
-    if (value != 28)
-    {
-        log_error("ERROR: Kernel wrote %lu, expected 28\n",
-                  static_cast<long unsigned>(value));
-        return TEST_FAIL;
-    }
-
-    return TEST_PASS;
-}
-
-int test_cxx_for_opencl_ext(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int)
-{
-    if (!is_extension_available(device, "cl_ext_cxx_for_opencl"))
-    {
-        log_info("Device does not support 'cl_ext_cxx_for_opencl'. Skipping "
-                 "the test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    return test_cxx_for_opencl(device, context, queue);
-}

diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp
deleted file mode 100644
index 0376081..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp
+++ /dev/null

@@ -1,102 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-
-
-int test_cxx_for_opencl_version(cl_device_id device, cl_context context,
-                                cl_command_queue queue)
-{
-    cl_int cxx4opencl_version;
-    cl_int cxx4opencl_expected_version;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int error;
-    cl_int value = 0;
-    const char *kernel_sstr =
-        R"(
-        __kernel void k(__global int* buf) {
-            buf[0] = __OPENCL_CPP_VERSION__;
-        })";
-    const size_t lengths[1] = { std::string{ kernel_sstr }.size() };
-
-    clProgramWrapper writer_program =
-        clCreateProgramWithSource(context, 1, &kernel_sstr, lengths, &error);
-    test_error(error, "Failed to create program with source");
-
-    error = clCompileProgram(writer_program, 1, &device, "-cl-std=CLC++", 0,
-                             nullptr, nullptr, nullptr, nullptr);
-    test_error(error, "Failed to compile program");
-
-    cl_program progs[1] = { writer_program };
-    program = clLinkProgram(context, 1, &device, "", 1, progs, 0, 0, &error);
-    test_error(error, "Failed to link program");
-
-    clMemWrapper out =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(cxx4opencl_version), &cxx4opencl_version, &error);
-    test_error(error, "clCreateBuffer failed");
-
-    kernel = clCreateKernel(program, "k", &error);
-    test_error(error, "Failed to create k kernel");
-
-    error = clSetKernelArg(kernel, 0, sizeof(out), &out);
-    test_error(error, "clSetKernelArg failed");
-
-    size_t global_size = 1;
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &global_size,
-                                   nullptr, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-
-    error = clEnqueueReadBuffer(queue, out, CL_BLOCKING, 0,
-                                sizeof(cxx4opencl_version), &cxx4opencl_version,
-                                0, nullptr, nullptr);
-    test_error(error, "clEnqueueReadBuffer failed");
-
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
-
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT,
-                        sizeof(value), &value, nullptr);
-    test_error(error, "Failed to get device info");
-
-    cxx4opencl_expected_version = CL_VERSION_MAJOR_KHR(value) * 100
-        + CL_VERSION_MINOR_KHR(value) * 10 + CL_VERSION_PATCH_KHR(value);
-
-    if (cxx4opencl_version != cxx4opencl_expected_version)
-    {
-        log_error("ERROR: C++ for OpenCL version mismatch - returned %lu, "
-                  "expected %lu\n",
-                  static_cast<long unsigned>(value),
-                  static_cast<long unsigned>(cxx4opencl_expected_version));
-        return TEST_FAIL;
-    }
-
-    return TEST_PASS;
-}
-
-int test_cxx_for_opencl_ver(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int)
-{
-    if (!is_extension_available(device, "cl_ext_cxx_for_opencl"))
-    {
-        log_info("Device does not support 'cl_ext_cxx_for_opencl'. Skipping "
-                 "the test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    return test_cxx_for_opencl_version(device, context, queue);
-}

diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp
deleted file mode 100644
index 5e8c14a..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp
+++ /dev/null

@@ -1,28 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "procs.h"
-
-test_definition test_list[] = {
-    ADD_TEST_VERSION(cxx_for_opencl_ext, Version(2, 0)),
-    ADD_TEST_VERSION(cxx_for_opencl_ver, Version(2, 0))
-};
-
-int main(int argc, const char *argv[])
-{
-    return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list,
-                                   false, 0, nullptr);
-}

diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h b/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h
deleted file mode 100644
index 5665e01..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h
+++ /dev/null

@@ -1,26 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _procs_h
-#define _procs_h
-
-#include "harness/typeWrappers.h"
-
-extern int test_cxx_for_opencl_ext(cl_device_id device, cl_context context,
-                                   cl_command_queue queue, int);
-extern int test_cxx_for_opencl_ver(cl_device_id device, cl_context context,
-                                   cl_command_queue queue, int);
-
-#endif /*_procs_h*/

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt b/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt
deleted file mode 100644
index 1ec2a33..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt
+++ /dev/null

@@ -1,25 +0,0 @@
-if(WIN32)
-list(APPEND CLConform_LIBRARIES d3d9.lib dxva2.lib )
-set(MODULE_NAME MEDIA_SURFACE_SHARING)
-
-set(${MODULE_NAME}_SOURCES
-        main.cpp
-        wrappers.cpp
-        utils.cpp
-        test_create_context.cpp
-        test_functions_api.cpp
-        test_functions_kernel.cpp
-        test_get_device_ids.cpp
-        test_interop_sync.cpp
-        test_memory_access.cpp
-        test_other_data_types.cpp
-    )
-
-set_source_files_properties(
-        ${MODULE_NAME}_SOURCES
-        PROPERTIES LANGUAGE CXX)
-
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-
-include(../../CMakeCommon.txt)
-endif(WIN32)

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp
deleted file mode 100644
index 8b70917..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp
+++ /dev/null

@@ -1,231 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "harness/testHarness.h"
-#include "utils.h"
-#include "procs.h"
-
-
-test_definition test_list[] = { ADD_TEST(context_create),
-                                ADD_TEST(get_device_ids),
-                                ADD_TEST(api),
-                                ADD_TEST(kernel),
-                                ADD_TEST(other_data_types),
-                                ADD_TEST(memory_access),
-                                ADD_TEST(interop_user_sync) };
-
-const int test_num = ARRAY_SIZE(test_list);
-
-clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR =
-    NULL;
-clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL;
-clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR =
-    NULL;
-clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR =
-    NULL;
-
-cl_platform_id gPlatformIDdetected;
-cl_device_id gDeviceIDdetected;
-cl_device_type gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
-
-bool MediaSurfaceSharingExtensionInit()
-{
-    clGetDeviceIDsFromDX9MediaAdapterKHR =
-        (clGetDeviceIDsFromDX9MediaAdapterKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                gPlatformIDdetected, "clGetDeviceIDsFromDX9MediaAdapterKHR");
-    if (clGetDeviceIDsFromDX9MediaAdapterKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clGetDeviceIDsFromDX9MediaAdapterKHR) returned NULL.\n");
-        return false;
-    }
-
-    clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)
-        clGetExtensionFunctionAddressForPlatform(
-            gPlatformIDdetected, "clCreateFromDX9MediaSurfaceKHR");
-    if (clCreateFromDX9MediaSurfaceKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clCreateFromDX9MediaSurfaceKHR) returned NULL.\n");
-        return false;
-    }
-
-    clEnqueueAcquireDX9MediaSurfacesKHR =
-        (clEnqueueAcquireDX9MediaSurfacesKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                gPlatformIDdetected, "clEnqueueAcquireDX9MediaSurfacesKHR");
-    if (clEnqueueAcquireDX9MediaSurfacesKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clEnqueueAcquireDX9MediaSurfacesKHR) returned NULL.\n");
-        return false;
-    }
-
-    clEnqueueReleaseDX9MediaSurfacesKHR =
-        (clEnqueueReleaseDX9MediaSurfacesKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                gPlatformIDdetected, "clEnqueueReleaseDX9MediaSurfacesKHR");
-    if (clEnqueueReleaseDX9MediaSurfacesKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clEnqueueReleaseDX9MediaSurfacesKHR) returned NULL.\n");
-        return false;
-    }
-
-    return true;
-}
-
-bool DetectPlatformAndDevice()
-{
-    std::vector<cl_platform_id> platforms;
-    cl_uint platformsNum = 0;
-    cl_int error = clGetPlatformIDs(0, 0, &platformsNum);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "clGetPlatformIDs failed\n");
-        return false;
-    }
-
-    platforms.resize(platformsNum);
-    error = clGetPlatformIDs(platformsNum, &platforms[0], 0);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "clGetPlatformIDs failed\n");
-        return false;
-    }
-
-    bool found = false;
-    for (size_t i = 0; i < platformsNum; ++i)
-    {
-        std::vector<cl_device_id> devices;
-        cl_uint devicesNum = 0;
-        error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, 0, 0,
-                               &devicesNum);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "clGetDeviceIDs failed\n");
-            return false;
-        }
-
-        devices.resize(devicesNum);
-        error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, devicesNum,
-                               &devices[0], 0);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "clGetDeviceIDs failed\n");
-            return false;
-        }
-
-        for (size_t j = 0; j < devicesNum; ++j)
-        {
-            if (is_extension_available(devices[j], "cl_khr_dx9_media_sharing"))
-            {
-                gPlatformIDdetected = platforms[i];
-                gDeviceIDdetected = devices[j];
-                found = true;
-                break;
-            }
-        }
-    }
-
-    if (!found)
-    {
-        log_info("Test was not run, because the media surface sharing "
-                 "extension is not supported for any devices.\n");
-        return false;
-    }
-
-    return true;
-}
-
-bool CmdlineParse(int argc, const char *argv[])
-{
-    char *env_mode = getenv("CL_DEVICE_TYPE");
-    if (env_mode != NULL)
-    {
-        if (strcmp(env_mode, "gpu") == 0
-            || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
-        else if (strcmp(env_mode, "cpu") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
-        else if (strcmp(env_mode, "accelerator") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
-        else if (strcmp(env_mode, "default") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
-        else
-        {
-            log_error("Unknown CL_DEVICE_TYPE env variable setting: "
-                      "%s.\nAborting...\n",
-                      env_mode);
-            return false;
-        }
-    }
-
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp(argv[i], "gpu") == 0
-            || strcmp(argv[i], "CL_DEVICE_TYPE_GPU") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
-            continue;
-        }
-        else if (strcmp(argv[i], "cpu") == 0
-                 || strcmp(argv[i], "CL_DEVICE_TYPE_CPU") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
-            continue;
-        }
-        else if (strcmp(argv[i], "accelerator") == 0
-                 || strcmp(argv[i], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
-            continue;
-        }
-        else if (strcmp(argv[i], "CL_DEVICE_TYPE_DEFAULT") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
-            continue;
-        }
-        else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0)
-        {
-            CDeviceWrapper::AccelerationType(CDeviceWrapper::ACCELERATION_SW);
-        }
-    }
-
-    return true;
-}
-
-int main(int argc, const char *argv[])
-{
-    if (!CmdlineParse(argc, argv)) return TEST_FAIL;
-
-    if (!DetectPlatformAndDevice())
-    {
-        log_info("Test was not run, because the media surface sharing "
-                 "extension is not supported\n");
-        return TEST_SKIP;
-    }
-
-    if (!MediaSurfaceSharingExtensionInit()) return TEST_FAIL;
-
-    return runTestHarness(argc, argv, test_num, test_list, true, 0);
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h
deleted file mode 100644
index e7fd785..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h
+++ /dev/null

@@ -1,38 +0,0 @@
-//
-// Copyright (c) 2019 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-
-#ifndef __MEDIA_SHARING_PROCS_H__
-#define __MEDIA_SHARING_PROCS_H__
-
-
-extern int test_context_create(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_get_device_ids(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_api(cl_device_id deviceID, cl_context context,
-                    cl_command_queue queue, int num_elements);
-extern int test_kernel(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, int num_elements);
-extern int test_other_data_types(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-extern int test_memory_access(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_interop_user_sync(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-
-
-#endif // #ifndef __MEDIA_SHARING_PROCS_H__
\ No newline at end of file

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp
deleted file mode 100644
index 6033ce9..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp
+++ /dev/null

@@ -1,373 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "utils.h"
-
-int context_create(cl_device_id deviceID, cl_context context,
-                   cl_command_queue queue, int num_elements, unsigned int width,
-                   unsigned int height, TContextFuncType functionCreate,
-                   cl_dx9_media_adapter_type_khr adapterType,
-                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
-{
-    CResult result;
-
-    // create device
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    // generate input data
-    std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
-    if (!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        cl_context_properties contextProperties[] = {
-            CL_CONTEXT_PLATFORM,
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-
-        clContextWrapper ctx;
-        switch (functionCreate)
-        {
-            case CONTEXT_CREATE_DEFAULT:
-                ctx = clCreateContext(&contextProperties[0], 1,
-                                      &gDeviceIDdetected, NULL, NULL, &error);
-                break;
-            case CONTEXT_CREATE_FROM_TYPE:
-                ctx = clCreateContextFromType(&contextProperties[0],
-                                              gDeviceTypeSelected, NULL, NULL,
-                                              &error);
-                break;
-            default:
-                log_error("Unknown context creation function enum\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-                break;
-        }
-
-        if (error != CL_SUCCESS)
-        {
-            std::string functionName;
-            FunctionContextCreateToString(functionCreate, functionName);
-            log_error("%s failed: %s\n", functionName.c_str(),
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfo;
-        surfaceInfo.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-        surfaceInfo.shared_handle = objectSharedHandle;
-#else
-        void *surfaceInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-#endif
-
-        std::vector<cl_mem> memObjList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planesList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjList.push_back(planesList[planeIdx]);
-        }
-
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!ImageInfoVerify(adapterType, memObjList, width, height, surface,
-                             objectSharedHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        cl_event event;
-        error = clEnqueueAcquireDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &memObjList.at(0), 0, NULL, &event);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        cl_uint eventType = 0;
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(eventType),
-                               &eventType, NULL);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        if (eventType != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR)
-        {
-            log_error(
-                "Invalid event != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        clReleaseEvent(event);
-
-        size_t origin[3] = { 0, 0, 0 };
-        size_t offset = 0;
-        size_t frameSize = width * height * 3 / 2;
-        std::vector<cl_uchar> out(frameSize, 0);
-        for (size_t i = 0; i < memObjList.size(); ++i)
-        {
-            size_t planeWidth = (i == 0) ? width : width / 2;
-            size_t planeHeight = (i == 0) ? height : height / 2;
-            size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-            error =
-                clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin,
-                                   regionPlane, 0, 0, &out.at(offset), 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReadImage failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            offset += planeWidth * planeHeight;
-        }
-
-        if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
-        {
-            log_error("OCL object verification failed - clEnqueueReadImage\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        error = clEnqueueReleaseDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &memObjList.at(0), 0, NULL, &event);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        eventType = 0;
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(eventType),
-                               &eventType, NULL);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        if (eventType != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR)
-        {
-            log_error(
-                "Invalid event != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        clReleaseEvent(event);
-
-        // object verification
-        std::vector<cl_uchar> bufferOut(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-    }
-
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return result.Result();
-}
-
-int test_context_create(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
-{
-    const unsigned int WIDTH = 256;
-    const unsigned int HEIGHT = 256;
-
-    std::vector<cl_dx9_media_adapter_type_khr> adapterTypes;
-#if defined(_WIN32)
-    adapterTypes.push_back(CL_ADAPTER_D3D9_KHR);
-    adapterTypes.push_back(CL_ADAPTER_D3D9EX_KHR);
-    adapterTypes.push_back(CL_ADAPTER_DXVA_KHR);
-#endif
-
-    std::vector<TContextFuncType> contextFuncs;
-    contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
-    contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
-
-    std::vector<TSurfaceFormat> formats;
-    formats.push_back(SURFACE_FORMAT_NV12);
-    formats.push_back(SURFACE_FORMAT_YV12);
-
-    std::vector<TSharedHandleType> sharedHandleTypes;
-    sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
-#if defined(_WIN32)
-    sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
-#endif
-
-    CResult result;
-    for (size_t adapterTypeIdx = 0; adapterTypeIdx < adapterTypes.size();
-         ++adapterTypeIdx)
-    {
-        // iteration through all create context functions
-        for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size();
-             ++contextFuncIdx)
-        {
-            // iteration through surface formats
-            for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
-            {
-                // shared handle enabled or disabled
-                for (size_t sharedHandleIdx = 0;
-                     sharedHandleIdx < sharedHandleTypes.size();
-                     ++sharedHandleIdx)
-                {
-                    if (adapterTypes[adapterTypeIdx] == CL_ADAPTER_D3D9_KHR
-                        && sharedHandleTypes[sharedHandleIdx]
-                            == SHARED_HANDLE_ENABLED)
-                        continue;
-
-                    if (context_create(
-                            deviceID, context, queue, num_elements, WIDTH,
-                            HEIGHT, contextFuncs[contextFuncIdx],
-                            adapterTypes[adapterTypeIdx], formats[formatIdx],
-                            sharedHandleTypes[sharedHandleIdx])
-                        != 0)
-                    {
-                        std::string sharedHandle =
-                            (sharedHandleTypes[sharedHandleIdx]
-                             == SHARED_HANDLE_ENABLED)
-                            ? "shared handle"
-                            : "no shared handle";
-                        std::string formatStr;
-                        std::string adapterTypeStr;
-                        SurfaceFormatToString(formats[formatIdx], formatStr);
-                        AdapterToString(adapterTypes[adapterTypeIdx],
-                                        adapterTypeStr);
-
-                        log_error("\nTest case - clCreateContext (%s, %s, %s) "
-                                  "failed\n\n",
-                                  adapterTypeStr.c_str(), formatStr.c_str(),
-                                  sharedHandle.c_str());
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                }
-            }
-        }
-    }
-
-    return result.Result();
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp
deleted file mode 100644
index ab92cb8..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp
+++ /dev/null

@@ -1,781 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "utils.h"
-
-int api_functions(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue, int num_elements,
-                  unsigned int iterationNum, unsigned int width,
-                  unsigned int height,
-                  cl_dx9_media_adapter_type_khr adapterType,
-                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
-{
-    const unsigned int FRAME_NUM = 2;
-    const cl_uchar MAX_VALUE = 255 / 2;
-    CResult result;
-
-    // create device
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    // generate input and expected data
-    std::vector<std::vector<cl_uchar>> bufferRef1(FRAME_NUM);
-    std::vector<std::vector<cl_uchar>> bufferRef2(FRAME_NUM);
-    std::vector<std::vector<cl_uchar>> bufferRef3(FRAME_NUM);
-    size_t frameSize = width * height * 3 / 2;
-    cl_uchar step = MAX_VALUE / FRAME_NUM;
-    for (size_t i = 0; i < FRAME_NUM; ++i)
-    {
-        if (!YUVGenerate(surfaceFormat, bufferRef1[i], width, height,
-                         static_cast<cl_uchar>(step * i),
-                         static_cast<cl_uchar>(step * (i + 1)))
-            || !YUVGenerate(surfaceFormat, bufferRef2[i], width, height,
-                            static_cast<cl_uchar>(step * i),
-                            static_cast<cl_uchar>(step * (i + 1)), 0.2)
-            || !YUVGenerate(surfaceFormat, bufferRef3[i], width, height,
-                            static_cast<cl_uchar>(step * i),
-                            static_cast<cl_uchar>(step * (i + 1)), 0.4))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-    }
-
-    // iterates through all devices
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-
-        // create surface
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        cl_context_properties contextProperties[] = {
-            CL_CONTEXT_PLATFORM,
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfo;
-        surfaceInfo.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-        surfaceInfo.shared_handle = objectSharedHandle;
-#else
-        void *surfaceInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-#endif
-
-        std::vector<cl_mem> memObjList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planesList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjList.push_back(planesList[planeIdx]);
-        }
-
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!ImageInfoVerify(adapterType, memObjList, width, height, surface,
-                             objectSharedHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
-        {
-            if (!YUVSurfaceSet(surfaceFormat, surface,
-                               bufferRef1[frameIdx % FRAME_NUM], width, height))
-            {
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            { // read operation
-                std::vector<cl_uchar> out(frameSize, 0);
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight;
-                }
-
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef1[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, OCL image is different then "
-                              "shared OCL object: clEnqueueReadImage\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // write operation
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    error = clEnqueueWriteImage(
-                        cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
-                        0, 0, &bufferRef2[frameIdx % FRAME_NUM][offset], 0, 0,
-                        0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueWriteImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight;
-                }
-            }
-
-            { // read operation
-                std::vector<cl_uchar> out(frameSize, 0);
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight;
-                }
-
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, Shared OCL image verification "
-                              "after clEnqueueWriteImage failed\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // copy operation (shared OCL to OCL)
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    cl_image_format formatPlane;
-                    formatPlane.image_channel_data_type = CL_UNORM_INT8;
-                    formatPlane.image_channel_order =
-                        (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) ? CL_RG
-                                                                        : CL_R;
-
-                    cl_image_desc imageDesc = { 0 };
-                    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
-                    imageDesc.image_width = planeWidth;
-                    imageDesc.image_height = planeHeight;
-
-                    clMemWrapper planeOCL =
-                        clCreateImage(ctx, CL_MEM_READ_WRITE, &formatPlane,
-                                      &imageDesc, 0, &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clCreateImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    error = clEnqueueCopyImage(cmdQueue, memObjList[i],
-                                               planeOCL, origin, origin,
-                                               regionPlane, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    error = clEnqueueReadImage(cmdQueue, planeOCL, CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight;
-                }
-
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error(
-                        "Frame idx: %i, OCL image verification after "
-                        "clEnqueueCopyImage (from shared OCL to OCL) failed\n",
-                        frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // copy operation (OCL to shared OCL)
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    size_t pitchSize =
-                        ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)
-                             ? width
-                             : planeWidth)
-                        * sizeof(cl_uchar);
-
-                    cl_image_format formatPlane;
-                    formatPlane.image_channel_data_type = CL_UNORM_INT8;
-                    formatPlane.image_channel_order =
-                        (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) ? CL_RG
-                                                                        : CL_R;
-
-                    cl_image_desc imageDesc = { 0 };
-                    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
-                    imageDesc.image_width = planeWidth;
-                    imageDesc.image_height = planeHeight;
-                    imageDesc.image_row_pitch = pitchSize;
-
-                    clMemWrapper planeOCL = clCreateImage(
-                        ctx, CL_MEM_COPY_HOST_PTR, &formatPlane, &imageDesc,
-                        &bufferRef1[frameIdx % FRAME_NUM][offset], &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clCreateImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    error = clEnqueueCopyImage(cmdQueue, planeOCL,
-                                               memObjList[i], origin, origin,
-                                               regionPlane, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight;
-                }
-
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef1[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error(
-                        "Frame idx: %i, OCL image verification after "
-                        "clEnqueueCopyImage (from OCL to shared OCL) failed\n",
-                        frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // copy from image to buffer
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                size_t bufferSize = sizeof(cl_uchar) * frameSize;
-                clMemWrapper buffer = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
-                                                     bufferSize, NULL, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateBuffer failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    error = clEnqueueCopyImageToBuffer(
-                        cmdQueue, memObjList[i], buffer, origin, regionPlane,
-                        offset, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyImageToBuffer failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight * sizeof(cl_uchar);
-                }
-
-                std::vector<cl_uchar> out(frameSize, 0);
-                error = clEnqueueReadBuffer(cmdQueue, buffer, CL_TRUE, 0,
-                                            bufferSize, &out[0], 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to read buffer");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef1[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, OCL buffer verification after "
-                              "clEnqueueCopyImageToBuffer (from shared OCL "
-                              "image to OCL buffer) failed\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // copy buffer to image
-                size_t bufferSize = sizeof(cl_uchar) * frameSize;
-                clMemWrapper buffer = clCreateBuffer(
-                    ctx, CL_MEM_COPY_HOST_PTR, bufferSize,
-                    &bufferRef2[frameIdx % FRAME_NUM][0], &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateBuffer failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    error = clEnqueueCopyBufferToImage(
-                        cmdQueue, buffer, memObjList[i], offset, origin,
-                        regionPlane, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyBufferToImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight * sizeof(cl_uchar);
-                }
-
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, OCL image verification after "
-                              "clEnqueueCopyBufferToImage (from OCL buffer to "
-                              "shared OCL image) failed\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // map operation to read
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    size_t pitchSize =
-                        ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)
-                             ? width
-                             : planeWidth);
-
-                    size_t rowPitch = 0;
-                    size_t slicePitch = 0;
-                    void *mapPtr = clEnqueueMapImage(
-                        cmdQueue, memObjList[i], CL_TRUE, CL_MAP_READ, origin,
-                        regionPlane, &rowPitch, &slicePitch, 0, 0, 0, &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueMapImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    for (size_t y = 0; y < planeHeight; ++y)
-                        memcpy(&out[offset + y * pitchSize],
-                               static_cast<cl_uchar *>(mapPtr)
-                                   + y * rowPitch / sizeof(cl_uchar),
-                               pitchSize * sizeof(cl_uchar));
-
-                    error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i],
-                                                    mapPtr, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueUnmapMemObject failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += pitchSize * planeHeight;
-                }
-
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, Mapped shared OCL image is "
-                              "different then expected\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // map operation to write
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    size_t pitchSize =
-                        ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)
-                             ? width
-                             : planeWidth);
-
-                    size_t rowPitch = 0;
-                    size_t slicePitch = 0;
-                    void *mapPtr = clEnqueueMapImage(
-                        cmdQueue, memObjList[i], CL_TRUE, CL_MAP_WRITE, origin,
-                        regionPlane, &rowPitch, &slicePitch, 0, 0, 0, &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueMapImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    for (size_t y = 0; y < planeHeight; ++y)
-                        memcpy(static_cast<cl_uchar *>(mapPtr)
-                                   + y * rowPitch / sizeof(cl_uchar),
-                               &bufferRef3[frameIdx % FRAME_NUM]
-                                          [offset + y * pitchSize],
-                               pitchSize * sizeof(cl_uchar));
-
-                    error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i],
-                                                    mapPtr, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueUnmapMemObject failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += pitchSize * planeHeight;
-                }
-            }
-
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            std::vector<cl_uchar> bufferOut(frameSize, 0);
-            if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width,
-                               height))
-            {
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            if (!YUVCompare(surfaceFormat, bufferOut,
-                            bufferRef3[frameIdx % FRAME_NUM], width, height))
-            {
-                log_error(
-                    "Frame idx: %i, media surface is different than expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return result.Result();
-}
-
-int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue,
-             int num_elements)
-{
-    CResult result;
-
-#if defined(_WIN32)
-    // D3D9
-    if (api_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (api_functions(deviceID, context, queue, num_elements, 3, 512, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // D3D9EX
-    if (api_functions(deviceID, context, queue, num_elements, 5, 256, 512,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (api_functions(deviceID, context, queue, num_elements, 7, 512, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (api_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (api_functions(deviceID, context, queue, num_elements, 15, 128, 128,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // DXVA
-    if (api_functions(deviceID, context, queue, num_elements, 20, 128, 128,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (api_functions(deviceID, context, queue, num_elements, 40, 64, 64,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (api_functions(deviceID, context, queue, num_elements, 5, 512, 512,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (api_functions(deviceID, context, queue, num_elements, 2, 1024, 1024,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-#else
-    return TEST_NOT_IMPLEMENTED;
-#endif
-
-    return result.Result();
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp
deleted file mode 100644
index a204440..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp
+++ /dev/null

@@ -1,541 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "harness/errorHelpers.h"
-#include "harness/kernelHelpers.h"
-
-#include "utils.h"
-
-int kernel_functions(cl_device_id deviceID, cl_context context,
-                     cl_command_queue queue, int num_elements,
-                     unsigned int iterationNum, unsigned int width,
-                     unsigned int height,
-                     cl_dx9_media_adapter_type_khr adapterType,
-                     TSurfaceFormat surfaceFormat,
-                     TSharedHandleType sharedHandle)
-{
-    const unsigned int FRAME_NUM = 2;
-    const cl_uchar MAX_VALUE = 255 / 2;
-    const std::string PROGRAM_STR =
-        "__kernel void TestFunction( read_only image2d_t planeIn, write_only "
-        "image2d_t planeOut, " NL "                            sampler_t "
-        "sampler, __global int *planeRes)" NL "{" NL
-        "  int w = get_global_id(0);" NL "  int h = get_global_id(1);" NL
-        "  int width = get_image_width(planeIn);" NL
-        "  int height = get_image_height(planeOut);" NL
-        "  float4 color0 = read_imagef(planeIn, sampler, (int2)(w,h)) + "
-        "0.2f;" NL "  float4 color1 = read_imagef(planeIn, sampler, "
-        "(float2)(w,h)) + 0.2f;" NL
-        "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, "
-        "0.5);" NL "  write_imagef(planeOut, (int2)(w,h), color0);" NL
-        "  if(w == 0 && h == 0)" NL "  {" NL "    planeRes[0] = width;" NL
-        "    planeRes[1] = height;" NL "  }" NL "}";
-
-    CResult result;
-
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    std::vector<std::vector<cl_uchar>> bufferIn(FRAME_NUM);
-    std::vector<std::vector<cl_uchar>> bufferExp(FRAME_NUM);
-    size_t frameSize = width * height * 3 / 2;
-    cl_uchar step = MAX_VALUE / FRAME_NUM;
-    for (size_t i = 0; i < FRAME_NUM; ++i)
-    {
-        if (!YUVGenerate(surfaceFormat, bufferIn[i], width, height,
-                         static_cast<cl_uchar>(step * i),
-                         static_cast<cl_uchar>(step * (i + 1)))
-            || !YUVGenerate(surfaceFormat, bufferExp[i], width, height,
-                            static_cast<cl_uchar>(step * i),
-                            static_cast<cl_uchar>(step * (i + 1)), 0.2))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-    }
-
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-
-        void *objectSrcHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceSrc;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceSrc,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectSrcHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        void *objectDstHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceDst;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceDst,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectDstHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        cl_context_properties contextProperties[] = {
-            CL_CONTEXT_PLATFORM,
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfoSrc;
-        surfaceInfoSrc.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
-        surfaceInfoSrc.shared_handle = objectSrcHandle;
-
-        cl_dx9_surface_info_khr surfaceInfoDst;
-        surfaceInfoDst.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
-        surfaceInfoDst.shared_handle = objectDstHandle;
-#else
-        void *surfaceInfoSrc = 0;
-        void *surfaceInfoDst = 0;
-        return TEST_NOT_IMPLEMENTED;
-#endif
-
-        std::vector<cl_mem> memObjSrcList;
-        std::vector<cl_mem> memObjDstList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planeSrcList(planesNum);
-        std::vector<clMemWrapper> planeDstList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planeSrcList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoSrc, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjSrcList.push_back(planeSrcList[planeIdx]);
-
-            planeDstList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoDst, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjDstList.push_back(planeDstList[planeIdx]);
-        }
-
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!ImageInfoVerify(adapterType, memObjSrcList, width, height,
-                             surfaceSrc, objectSrcHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
-        {
-            if (!YUVSurfaceSet(surfaceFormat, surfaceSrc,
-                               bufferIn[frameIdx % FRAME_NUM], width, height))
-            {
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjSrcList.size()),
-                &memObjSrcList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjDstList.size()),
-                &memObjDstList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            clSamplerWrapper sampler = clCreateSampler(
-                ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("Unable to create sampler\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            clProgramWrapper program;
-            clKernelWrapper kernel;
-            const char *progPtr = PROGRAM_STR.c_str();
-            if (create_single_kernel_helper(ctx, &program, &kernel, 1,
-                                            (const char **)&progPtr,
-                                            "TestFunction"))
-                result.ResultSub(CResult::TEST_FAIL);
-
-            size_t bufferSize = sizeof(cl_int) * 2;
-            clMemWrapper imageRes = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
-                                                   bufferSize, NULL, &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clCreateBuffer failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            size_t offset = 0;
-            size_t origin[3] = { 0, 0, 0 };
-            std::vector<cl_uchar> out(frameSize, 0);
-            for (size_t i = 0; i < memObjSrcList.size(); ++i)
-            {
-                size_t planeWidth = (i == 0) ? width : width / 2;
-                size_t planeHeight = (i == 0) ? height : height / 2;
-                size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                size_t threads[2] = { planeWidth, planeHeight };
-
-                error = clSetKernelArg(kernel, 0, sizeof(memObjSrcList[i]),
-                                       &memObjSrcList[i]);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error = clSetKernelArg(kernel, 1, sizeof(memObjDstList[i]),
-                                       &memObjDstList[i]);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error = clSetKernelArg(kernel, 3, sizeof(imageRes), &imageRes);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                size_t localThreads[2];
-                error = get_max_common_2D_work_group_size(ctx, kernel, threads,
-                                                          localThreads);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to get work group size to use");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error =
-                    clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, threads,
-                                           localThreads, 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to execute test kernel");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                std::vector<cl_uint> imageResOut(2, 0);
-                error = clEnqueueReadBuffer(cmdQueue, imageRes, CL_TRUE, 0,
-                                            bufferSize, &imageResOut[0], 0,
-                                            NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to read buffer");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                if (imageResOut[0] != planeWidth)
-                {
-                    log_error("Invalid width value, test = %i, expected = %i\n",
-                              imageResOut[0], planeWidth);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                if (imageResOut[1] != planeHeight)
-                {
-                    log_error(
-                        "Invalid height value, test = %i, expected = %i\n",
-                        imageResOut[1], planeHeight);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error = clEnqueueReadImage(cmdQueue, memObjDstList[i], CL_TRUE,
-                                           origin, regionPlane, 0, 0,
-                                           &out[offset], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                offset += planeWidth * planeHeight;
-            }
-
-            if (!YUVCompare(surfaceFormat, out, bufferExp[frameIdx % FRAME_NUM],
-                            width, height))
-            {
-                log_error(
-                    "Frame idx: %i, OCL objects are different than expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjSrcList.size()),
-                &memObjSrcList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjDstList.size()),
-                &memObjDstList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            std::vector<cl_uchar> bufferOut(frameSize, 0);
-            if (!YUVSurfaceGet(surfaceFormat, surfaceDst, bufferOut, width,
-                               height))
-            {
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            if (!YUVCompare(surfaceFormat, bufferOut,
-                            bufferExp[frameIdx % FRAME_NUM], width, height))
-            {
-                log_error(
-                    "Frame idx: %i, media surface is different than expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return result.Result();
-}
-
-int test_kernel(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    CResult result;
-
-#if defined(_WIN32)
-    // D3D9
-    if (kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                         CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (kernel_functions(deviceID, context, queue, num_elements, 3, 256, 256,
-                         CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // D3D9EX
-    if (kernel_functions(deviceID, context, queue, num_elements, 5, 256, 512,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (kernel_functions(deviceID, context, queue, num_elements, 7, 512, 256,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (kernel_functions(deviceID, context, queue, num_elements, 15, 128, 128,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // DXVA
-    if (kernel_functions(deviceID, context, queue, num_elements, 20, 128, 128,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (kernel_functions(deviceID, context, queue, num_elements, 40, 64, 64,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (kernel_functions(deviceID, context, queue, num_elements, 5, 512, 512,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (kernel_functions(deviceID, context, queue, num_elements, 2, 1024, 1024,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-#else
-    return TEST_NOT_IMPLEMENTED;
-#endif
-
-    return result.Result();
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp
deleted file mode 100644
index 613a602..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp
+++ /dev/null

@@ -1,220 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "utils.h"
-
-int get_device_ids(cl_device_id deviceID, cl_context context,
-                   cl_command_queue queue, int num_elements,
-                   cl_dx9_media_adapter_type_khr adapterType)
-{
-    CResult result;
-
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    cl_uint devicesExpectedNum = 0;
-    cl_int error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, 0, 0,
-                                  &devicesExpectedNum);
-    if (error != CL_SUCCESS || devicesExpectedNum < 1)
-    {
-        log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
-        result.ResultSub(CResult::TEST_FAIL);
-        return result.Result();
-    }
-
-    std::vector<cl_device_id> devicesExpected(devicesExpectedNum);
-    error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL,
-                           devicesExpectedNum, &devicesExpected[0], 0);
-    if (error != CL_SUCCESS)
-    {
-        log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
-        result.ResultSub(CResult::TEST_FAIL);
-        return result.Result();
-    }
-
-    while (deviceWrapper->AdapterNext())
-    {
-        std::vector<cl_dx9_media_adapter_type_khr> mediaAdapterTypes;
-        mediaAdapterTypes.push_back(adapterType);
-
-        std::vector<void *> mediaDevices;
-        mediaDevices.push_back(deviceWrapper->Device());
-
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result)))
-        {
-            return result.Result();
-        }
-
-        cl_uint devicesAllNum = 0;
-        error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-            gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-            CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
-        if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
-        {
-            log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        std::vector<cl_device_id> devicesAll;
-        if (devicesAllNum > 0)
-        {
-            devicesAll.resize(devicesAllNum);
-            error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-                gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-                CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesAllNum,
-                &devicesAll[0], 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-        }
-
-        cl_uint devicesPreferredNum = 0;
-        error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-            gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-            CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0,
-            &devicesPreferredNum);
-        if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
-        {
-            log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        std::vector<cl_device_id> devicesPreferred;
-        if (devicesPreferredNum > 0)
-        {
-            devicesPreferred.resize(devicesPreferredNum);
-            error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-                gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-                CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR,
-                devicesPreferredNum, &devicesPreferred[0], 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-        }
-
-        if (devicesAllNum < devicesPreferredNum)
-        {
-            log_error("Invalid number of preferred devices. It should be a "
-                      "subset of all devices\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        for (cl_uint i = 0; i < devicesPreferredNum; ++i)
-        {
-            cl_uint j = 0;
-            for (; j < devicesAllNum; ++j)
-            {
-                if (devicesPreferred[i] == devicesAll[j]) break;
-            }
-
-            if (j == devicesAllNum)
-            {
-                log_error("Preferred device is not a subset of all devices\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-
-        for (cl_uint i = 0; i < devicesAllNum; ++i)
-        {
-            cl_uint j = 0;
-            for (; j < devicesExpectedNum; ++j)
-            {
-                if (devicesAll[i] == devicesExpected[j]) break;
-            }
-
-            if (j == devicesExpectedNum)
-            {
-                log_error("CL_ALL_DEVICES_FOR_MEDIA_ADAPTER_KHR should be a "
-                          "subset of all devices for selected platform\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return result.Result();
-}
-
-int test_get_device_ids(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
-{
-    CResult result;
-
-#if defined(_WIN32)
-    if (get_device_ids(deviceID, context, queue, num_elements,
-                       CL_ADAPTER_D3D9_KHR)
-        != 0)
-    {
-        log_error("\nTest case (D3D9) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (get_device_ids(deviceID, context, queue, num_elements,
-                       CL_ADAPTER_D3D9EX_KHR)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (get_device_ids(deviceID, context, queue, num_elements,
-                       CL_ADAPTER_DXVA_KHR)
-        != 0)
-    {
-        log_error("\nTest case (DXVA) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-#else
-    return TEST_NOT_IMPLEMENTED;
-#endif
-
-    return result.Result();
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp
deleted file mode 100644
index fbc616e..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp
+++ /dev/null

@@ -1,419 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "utils.h"
-
-int interop_user_sync(cl_device_id deviceID, cl_context context,
-                      cl_command_queue queue, int num_elements,
-                      unsigned int width, unsigned int height,
-                      TContextFuncType functionCreate,
-                      cl_dx9_media_adapter_type_khr adapterType,
-                      TSurfaceFormat surfaceFormat,
-                      TSharedHandleType sharedHandle, cl_bool userSync)
-{
-    CResult result;
-
-    // create device
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    // generate input data
-    std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
-    if (!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string syncStr = (userSync == CL_TRUE) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info("Skipping test case, image format is not supported by a "
-                     "device (adapter type: %s, format: %s, shared handle: %s, "
-                     "user sync: %s)\n",
-                     adapterStr.c_str(), formatStr.c_str(),
-                     sharedHandleStr.c_str(), syncStr.c_str());
-            return result.Result();
-        }
-
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        cl_context_properties contextProperties[] = {
-            CL_CONTEXT_PLATFORM,
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            CL_CONTEXT_INTEROP_USER_SYNC,
-            userSync,
-            0,
-        };
-
-
-        clContextWrapper ctx;
-        switch (functionCreate)
-        {
-            case CONTEXT_CREATE_DEFAULT:
-                ctx = clCreateContext(&contextProperties[0], 1,
-                                      &gDeviceIDdetected, NULL, NULL, &error);
-                break;
-            case CONTEXT_CREATE_FROM_TYPE:
-                ctx = clCreateContextFromType(&contextProperties[0],
-                                              gDeviceTypeSelected, NULL, NULL,
-                                              &error);
-                break;
-            default:
-                log_error("Unknown context creation function enum\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-                break;
-        }
-
-        if (error != CL_SUCCESS)
-        {
-            std::string functionName;
-            FunctionContextCreateToString(functionCreate, functionName);
-            log_error("%s failed: %s\n", functionName.c_str(),
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfo;
-        surfaceInfo.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-        surfaceInfo.shared_handle = objectSharedHandle;
-#else
-        void *surfaceInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-#endif
-
-        std::vector<cl_mem> memObjList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planesList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjList.push_back(planesList[planeIdx]);
-        }
-
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!ImageInfoVerify(adapterType, memObjList, width, height, surface,
-                             objectSharedHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        if (userSync == CL_TRUE)
-        {
-#if defined(_WIN32)
-            IDirect3DQuery9 *eventQuery = NULL;
-            switch (adapterType)
-            {
-                case CL_ADAPTER_D3D9_KHR: {
-                    LPDIRECT3DDEVICE9 device =
-                        (LPDIRECT3DDEVICE9)deviceWrapper->Device();
-                    device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
-                    eventQuery->Issue(D3DISSUE_END);
-
-                    while (S_FALSE
-                           == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
-                        ;
-                }
-                break;
-                case CL_ADAPTER_D3D9EX_KHR: {
-                    LPDIRECT3DDEVICE9EX device =
-                        (LPDIRECT3DDEVICE9EX)deviceWrapper->Device();
-                    device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
-                    eventQuery->Issue(D3DISSUE_END);
-
-                    while (S_FALSE
-                           == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
-                        ;
-                }
-                break;
-                case CL_ADAPTER_DXVA_KHR: {
-                    CDXVAWrapper *DXVADevice =
-                        dynamic_cast<CDXVAWrapper *>(&(*deviceWrapper));
-                    LPDIRECT3DDEVICE9EX device =
-                        (LPDIRECT3DDEVICE9EX)(DXVADevice->D3D9()).Device();
-                    device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
-                    eventQuery->Issue(D3DISSUE_END);
-
-                    while (S_FALSE
-                           == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
-                        ;
-                }
-                break;
-                default:
-                    log_error("Unknown adapter type\n");
-                    return false;
-                    break;
-            }
-            if (eventQuery)
-            {
-                eventQuery->Release();
-            }
-#else
-            return TEST_NOT_IMPLEMENTED;
-#endif
-        }
-
-        error = clEnqueueAcquireDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &memObjList.at(0), 0, 0, 0);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        size_t origin[3] = { 0, 0, 0 };
-        size_t offset = 0;
-        size_t frameSize = width * height * 3 / 2;
-        std::vector<cl_uchar> out(frameSize, 0);
-        for (size_t i = 0; i < memObjList.size(); ++i)
-        {
-            size_t planeWidth = (i == 0) ? width : width / 2;
-            size_t planeHeight = (i == 0) ? height : height / 2;
-            size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-            error =
-                clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin,
-                                   regionPlane, 0, 0, &out.at(offset), 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReadImage failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            offset += planeWidth * planeHeight;
-        }
-
-        if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
-        {
-            log_error("OCL object verification failed - clEnqueueReadImage\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        error = clEnqueueReleaseDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &memObjList.at(0), 0, 0, 0);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        if (userSync == CL_TRUE)
-        {
-            error = clFinish(cmdQueue);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clFinish failed: %s\n", IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-
-        // shared object verification
-        std::vector<cl_uchar> bufferOut(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-    }
-
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return result.Result();
-}
-
-int test_interop_user_sync(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-{
-    const unsigned int WIDTH = 256;
-    const unsigned int HEIGHT = 256;
-
-    std::vector<cl_dx9_media_adapter_type_khr> adapters;
-#if defined(_WIN32)
-    adapters.push_back(CL_ADAPTER_D3D9_KHR);
-    adapters.push_back(CL_ADAPTER_D3D9EX_KHR);
-    adapters.push_back(CL_ADAPTER_DXVA_KHR);
-#else
-    return TEST_NOT_IMPLEMENTED;
-#endif
-
-    std::vector<TContextFuncType> contextFuncs;
-    contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
-    contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
-
-    std::vector<TSurfaceFormat> formats;
-    formats.push_back(SURFACE_FORMAT_NV12);
-    formats.push_back(SURFACE_FORMAT_YV12);
-
-    std::vector<TSharedHandleType> sharedHandleTypes;
-    sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
-    sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
-
-    std::vector<cl_bool> sync;
-    sync.push_back(CL_FALSE);
-    sync.push_back(CL_TRUE);
-
-    CResult result;
-    for (size_t adapterIdx = 0; adapterIdx < adapters.size(); ++adapterIdx)
-    {
-        // iteration through all create context functions
-        for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size();
-             ++contextFuncIdx)
-        {
-            // iteration through YUV formats
-            for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
-            {
-                // shared handle enabled or disabled
-                for (size_t sharedHandleIdx = 0;
-                     sharedHandleIdx < sharedHandleTypes.size();
-                     ++sharedHandleIdx)
-                {
-                    // user sync interop disabled or enabled
-                    for (size_t syncIdx = 0; syncIdx < sync.size(); ++syncIdx)
-                    {
-                        if (adapters[adapterIdx] == CL_ADAPTER_D3D9_KHR
-                            && sharedHandleTypes[sharedHandleIdx]
-                                == SHARED_HANDLE_ENABLED)
-                            continue;
-
-                        if (interop_user_sync(
-                                deviceID, context, queue, num_elements, WIDTH,
-                                HEIGHT, contextFuncs[contextFuncIdx],
-                                adapters[adapterIdx], formats[formatIdx],
-                                sharedHandleTypes[sharedHandleIdx],
-                                sync[syncIdx])
-                            != 0)
-                        {
-                            std::string syncStr = (sync[syncIdx] == CL_TRUE)
-                                ? "user sync enabled"
-                                : "user sync disabled";
-                            std::string sharedHandle =
-                                (sharedHandleTypes[sharedHandleIdx]
-                                 == SHARED_HANDLE_ENABLED)
-                                ? "shared handle"
-                                : "no shared handle";
-                            std::string adapterStr;
-                            std::string formatStr;
-                            SurfaceFormatToString(formats[formatIdx],
-                                                  formatStr);
-                            AdapterToString(adapters[adapterIdx], adapterStr);
-
-                            log_error("\nTest case - clCreateContext (%s, %s, "
-                                      "%s, %s) failed\n\n",
-                                      adapterStr.c_str(), formatStr.c_str(),
-                                      sharedHandle.c_str(), syncStr.c_str());
-                            result.ResultSub(CResult::TEST_FAIL);
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return result.Result();
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp
deleted file mode 100644
index 1e4e2c4..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp
+++ /dev/null

@@ -1,549 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "utils.h"
-
-int memory_access(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue, int num_elements, unsigned int width,
-                  unsigned int height,
-                  cl_dx9_media_adapter_type_khr adapterType,
-                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
-{
-    CResult result;
-
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    // creates device
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    // generate input and expected data
-    size_t frameSize = width * height * 3 / 2;
-    std::vector<cl_uchar> bufferRef0(frameSize, 0);
-    std::vector<cl_uchar> bufferRef1(frameSize, 0);
-    std::vector<cl_uchar> bufferRef2(frameSize, 0);
-    if (!YUVGenerate(surfaceFormat, bufferRef0, width, height, 0, 90)
-        || !YUVGenerate(surfaceFormat, bufferRef1, width, height, 91, 180)
-        || !YUVGenerate(surfaceFormat, bufferRef2, width, height, 181, 255))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    // iterates through all devices
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-
-        // creates surface
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef0, width, height))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        cl_context_properties contextProperties[] = {
-            CL_CONTEXT_PLATFORM,
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        { // memory access write
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-            surfaceInfo.resource =
-                *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-            surfaceInfo.shared_handle = objectSharedHandle;
-#else
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-#endif
-
-            std::vector<cl_mem> memObjList;
-            unsigned int planesNum = PlanesNum(surfaceFormat);
-            std::vector<clMemWrapper> planesList(planesNum);
-            for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-            {
-                planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                    ctx, CL_MEM_WRITE_ONLY, adapterType, &surfaceInfo, planeIdx,
-                    &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateFromDX9MediaSurfaceKHR failed for "
-                              "WRITE_ONLY plane %i: %s\n",
-                              planeIdx, IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                    return result.Result();
-                }
-                memObjList.push_back(planesList[planeIdx]);
-            }
-
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            size_t offset = 0;
-            size_t origin[3] = { 0, 0, 0 };
-            for (size_t i = 0; i < memObjList.size(); ++i)
-            {
-                size_t planeWidth = (i == 0) ? width : width / 2;
-                size_t planeHeight = (i == 0) ? height : height / 2;
-                size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE,
-                                            origin, regionPlane, 0, 0,
-                                            &bufferRef1[offset], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueWriteImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                offset += planeWidth * planeHeight;
-            }
-
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-
-        std::vector<cl_uchar> bufferOut0(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut0, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!YUVCompare(surfaceFormat, bufferOut0, bufferRef1, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        { // memory access read
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-            surfaceInfo.resource =
-                *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-            surfaceInfo.shared_handle = objectSharedHandle;
-#else
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-#endif
-
-            std::vector<cl_mem> memObjList;
-            unsigned int planesNum = PlanesNum(surfaceFormat);
-            std::vector<clMemWrapper> planesList(planesNum);
-            for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-            {
-                planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                    ctx, CL_MEM_READ_ONLY, adapterType, &surfaceInfo, planeIdx,
-                    &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateFromDX9MediaSurfaceKHR failed for "
-                              "READ_ONLY plane %i: %s\n",
-                              planeIdx, IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                    return result.Result();
-                }
-                memObjList.push_back(planesList[planeIdx]);
-            }
-
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            std::vector<cl_uchar> out(frameSize, 0);
-            size_t offset = 0;
-            size_t origin[3] = { 0, 0, 0 };
-
-            for (size_t i = 0; i < memObjList.size(); ++i)
-            {
-                size_t planeWidth = (i == 0) ? width : width / 2;
-                size_t planeHeight = (i == 0) ? height : height / 2;
-                size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                           origin, regionPlane, 0, 0,
-                                           &out[offset], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                offset += planeWidth * planeHeight;
-            }
-
-            if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
-            {
-                log_error("OCL image (READ_ONLY) is different then expected\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-
-        std::vector<cl_uchar> bufferOut1(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut1, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!YUVCompare(surfaceFormat, bufferOut1, bufferRef1, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        { // memory access read write
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-            surfaceInfo.resource =
-                *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-            surfaceInfo.shared_handle = objectSharedHandle;
-#else
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-#endif
-
-            std::vector<cl_mem> memObjList;
-            unsigned int planesNum = PlanesNum(surfaceFormat);
-            std::vector<clMemWrapper> planesList(planesNum);
-            for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-            {
-                planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                    ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                    &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateFromDX9MediaSurfaceKHR failed for "
-                              "READ_WRITE plane %i: %s\n",
-                              planeIdx, IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                    return result.Result();
-                }
-                memObjList.push_back(planesList[planeIdx]);
-            }
-
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            { // read
-                std::vector<cl_uchar> out(frameSize, 0);
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight;
-                }
-
-                if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
-                {
-                    log_error(
-                        "OCL image (READ_WRITE) is different then expected\n");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // write
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-
-                    error = clEnqueueWriteImage(
-                        cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
-                        0, 0, &bufferRef2[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueWriteImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-
-                    offset += planeWidth * planeHeight;
-                }
-            }
-
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-
-        std::vector<cl_uchar> bufferOut2(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut2, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!YUVCompare(surfaceFormat, bufferOut2, bufferRef2, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-    }
-
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return result.Result();
-}
-
-int test_memory_access(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, int num_elements)
-{
-    CResult result;
-
-#if defined(_WIN32)
-    // D3D9
-    if (memory_access(deviceID, context, queue, num_elements, 256, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (memory_access(deviceID, context, queue, num_elements, 512, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // D3D9EX
-    if (memory_access(deviceID, context, queue, num_elements, 256, 512,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (memory_access(deviceID, context, queue, num_elements, 512, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (memory_access(deviceID, context, queue, num_elements, 256, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (memory_access(deviceID, context, queue, num_elements, 128, 128,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // DXVA
-    if (memory_access(deviceID, context, queue, num_elements, 128, 128,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (memory_access(deviceID, context, queue, num_elements, 64, 64,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (memory_access(deviceID, context, queue, num_elements, 512, 512,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (memory_access(deviceID, context, queue, num_elements, 1024, 1024,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-#else
-    return TEST_NOT_IMPLEMENTED;
-#endif
-
-    return result.Result();
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp
deleted file mode 100644
index 0e5d1d1..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp
+++ /dev/null

@@ -1,1319 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "harness/errorHelpers.h"
-#include "harness/imageHelpers.h"
-#include "harness/kernelHelpers.h"
-
-#include "utils.h"
-
-template <typename T>
-int other_data_types(cl_device_id deviceID, cl_context context,
-                     cl_command_queue queue, int num_elements,
-                     unsigned int iterationNum, unsigned int width,
-                     unsigned int height,
-                     cl_dx9_media_adapter_type_khr adapterType,
-                     TSurfaceFormat surfaceFormat,
-                     TSharedHandleType sharedHandle)
-{
-    const unsigned int FRAME_NUM = 2;
-    const float MAX_VALUE = 0.6f;
-    const std::string PROGRAM_STR =
-        "__kernel void TestFunction( read_only image2d_t imageIn, write_only "
-        "image2d_t imageOut, " NL "                            sampler_t "
-        "sampler, __global int *imageRes)" NL "{" NL
-        "  int w = get_global_id(0);" NL "  int h = get_global_id(1);" NL
-        "  int width = get_image_width(imageIn);" NL
-        "  int height = get_image_height(imageOut);" NL
-        "  float4 color0 = read_imagef(imageIn, sampler, (int2)(w,h)) - "
-        "0.2f;" NL "  float4 color1 = read_imagef(imageIn, sampler, "
-        "(float2)(w,h)) - 0.2f;" NL
-        "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, "
-        "0.5);" NL "  write_imagef(imageOut, (int2)(w,h), color0);" NL
-        "  if(w == 0 && h == 0)" NL "  {" NL "    imageRes[0] = width;" NL
-        "    imageRes[1] = height;" NL "  }" NL "}";
-
-    CResult result;
-
-    cl_image_format format;
-    if (!SurfaceFormatToOCL(surfaceFormat, format))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-
-        cl_context_properties contextProperties[] = {
-            CL_CONTEXT_PLATFORM,
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        if (!SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-
-        if (!ImageFormatCheck(ctx, CL_MEM_OBJECT_IMAGE2D, format))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info("Skipping test case, image format is not supported by OCL "
-                     "(adapter type: %s, format: %s, shared handle: %s)\n",
-                     adapterStr.c_str(), formatStr.c_str(),
-                     sharedHandleStr.c_str());
-            return result.Result();
-        }
-
-        if (format.image_channel_data_type == CL_HALF_FLOAT)
-        {
-            if (DetectFloatToHalfRoundingMode(cmdQueue))
-            {
-                log_error("Unable to detect rounding mode\n");
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-        }
-
-        std::vector<std::vector<T>> bufferIn(FRAME_NUM);
-        std::vector<std::vector<T>> bufferExp(FRAME_NUM);
-        float step = MAX_VALUE / static_cast<float>(FRAME_NUM);
-        unsigned int planeNum = ChannelNum(surfaceFormat);
-        for (size_t i = 0; i < FRAME_NUM; ++i)
-        {
-            DataGenerate(surfaceFormat, format.image_channel_data_type,
-                         bufferIn[i], width, height, planeNum, step * i,
-                         step * (i + 1));
-            DataGenerate(surfaceFormat, format.image_channel_data_type,
-                         bufferExp[i], width, height, planeNum, step * i,
-                         step * (i + 1), 0.2f);
-        }
-
-        void *objectSrcHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceSrc;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceSrc,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectSrcHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-        void *objectDstHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceDst;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceDst,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectDstHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceSrcInfo;
-        CD3D9SurfaceWrapper *dx9SurfaceSrc =
-            (static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
-        surfaceSrcInfo.resource = *dx9SurfaceSrc;
-        surfaceSrcInfo.shared_handle = objectSrcHandle;
-
-        cl_dx9_surface_info_khr surfaceDstInfo;
-        CD3D9SurfaceWrapper *dx9SurfaceDst =
-            (static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
-        surfaceDstInfo.resource = *dx9SurfaceDst;
-        surfaceDstInfo.shared_handle = objectDstHandle;
-#else
-        void *surfaceSrcInfo = 0;
-        void *surfaceDstInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-#endif
-
-        // create OCL shared object
-        clMemWrapper objectSrcShared = clCreateFromDX9MediaSurfaceKHR(
-            ctx, CL_MEM_READ_WRITE, adapterType, &surfaceSrcInfo, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        clMemWrapper objectDstShared = clCreateFromDX9MediaSurfaceKHR(
-            ctx, CL_MEM_READ_WRITE, adapterType, &surfaceDstInfo, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-
-        std::vector<cl_mem> memObjList;
-        memObjList.push_back(objectSrcShared);
-        memObjList.push_back(objectDstShared);
-
-        if (!GetMemObjInfo(objectSrcShared, adapterType, surfaceSrc,
-                           objectSrcHandle))
-        {
-            log_error("Invalid memory object info\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        if (!GetImageInfo(objectSrcShared, format, sizeof(T) * planeNum,
-                          width * sizeof(T) * planeNum, 0, width, height, 0, 0))
-        {
-            log_error("clGetImageInfo failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-
-        for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
-        {
-            // surface set
-#if defined(_WIN32)
-            D3DLOCKED_RECT rect;
-            if (FAILED((*dx9SurfaceSrc)->LockRect(&rect, NULL, 0)))
-            {
-                log_error("Surface lock failed\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-
-            size_t pitch = rect.Pitch / sizeof(T);
-            size_t lineSize = width * planeNum * sizeof(T);
-            T *ptr = static_cast<T *>(rect.pBits);
-
-            for (size_t y = 0; y < height; ++y)
-                memcpy(ptr + y * pitch,
-                       &bufferIn[frameIdx % FRAME_NUM][y * width * planeNum],
-                       lineSize);
-
-            (*dx9SurfaceSrc)->UnlockRect();
-#else
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-#endif
-
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-
-            size_t origin[3] = { 0, 0, 0 };
-            size_t region[3] = { width, height, 1 };
-
-            { // read operation
-                std::vector<T> out(planeNum * width * height, 0);
-                error =
-                    clEnqueueReadImage(cmdQueue, objectSrcShared, CL_TRUE,
-                                       origin, region, 0, 0, &out[0], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                if (!DataCompare(surfaceFormat, format.image_channel_data_type,
-                                 out, bufferIn[frameIdx % FRAME_NUM], width,
-                                 height, planeNum))
-                {
-                    log_error("Frame idx: %i, OCL object is different then "
-                              "expected\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // write operation
-                error = clEnqueueWriteImage(
-                    cmdQueue, objectSrcShared, CL_TRUE, origin, region, 0, 0,
-                    &bufferExp[frameIdx % FRAME_NUM][0], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueWriteImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // kernel operations
-                clSamplerWrapper sampler = clCreateSampler(
-                    ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to create sampler\n");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                size_t threads[2] = { width, height };
-                clProgramWrapper program;
-                clKernelWrapper kernel;
-                const char *progPtr = PROGRAM_STR.c_str();
-                if (create_single_kernel_helper(ctx, &program, &kernel, 1,
-                                                (const char **)&progPtr,
-                                                "TestFunction"))
-                    result.ResultSub(CResult::TEST_FAIL);
-
-                error = clSetKernelArg(kernel, 0, sizeof(objectSrcShared),
-                                       &(objectSrcShared));
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error = clSetKernelArg(kernel, 1, sizeof(objectDstShared),
-                                       &(objectDstShared));
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                size_t bufferSize = sizeof(cl_int) * 2;
-                clMemWrapper imageRes = clCreateBuffer(
-                    ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateBuffer failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error = clSetKernelArg(kernel, 3, sizeof(imageRes), &imageRes);
-
-                size_t localThreads[2];
-                error = get_max_common_2D_work_group_size(ctx, kernel, threads,
-                                                          localThreads);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to get work group size to use");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                error =
-                    clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, threads,
-                                           localThreads, 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to execute test kernel");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                std::vector<cl_uint> imageResOut(2, 0);
-                error = clEnqueueReadBuffer(cmdQueue, imageRes, CL_TRUE, 0,
-                                            bufferSize, &imageResOut[0], 0,
-                                            NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to read buffer");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                if (imageResOut[0] != width)
-                {
-                    log_error("Invalid width value, test = %i, expected = %i\n",
-                              imageResOut[0], width);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                if (imageResOut[1] != height)
-                {
-                    log_error(
-                        "Invalid height value, test = %i, expected = %i\n",
-                        imageResOut[1], height);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            { // map operation
-                size_t mapOrigin[3] = { 0, 0, 0 };
-                size_t mapRegion[3] = { width, height, 1 };
-
-                std::vector<T> out(width * height * planeNum, 0);
-                size_t rowPitch = 0;
-                size_t slicePitch = 0;
-                void *mapPtr = clEnqueueMapImage(
-                    cmdQueue, objectDstShared, CL_TRUE,
-                    CL_MAP_READ | CL_MAP_WRITE, mapOrigin, mapRegion, &rowPitch,
-                    &slicePitch, 0, 0, 0, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueMapImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                for (size_t y = 0; y < height; ++y)
-                    memcpy(&out[y * width * planeNum],
-                           static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T),
-                           width * planeNum * sizeof(T));
-
-                if (!DataCompare(surfaceFormat, format.image_channel_data_type,
-                                 out, bufferIn[frameIdx % FRAME_NUM], width,
-                                 height, planeNum))
-                {
-                    log_error("Frame idx: %i, Mapped OCL object is different "
-                              "then expected\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-
-                for (size_t y = 0; y < height; ++y)
-                    memcpy(
-                        static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T),
-                        &bufferExp[frameIdx % FRAME_NUM][y * width * planeNum],
-                        width * planeNum * sizeof(T));
-
-                error = clEnqueueUnmapMemObject(cmdQueue, objectDstShared,
-                                                mapPtr, 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueUnmapMemObject failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-
-            std::vector<T> out(width * height * planeNum, 0);
-            // surface get
-#if defined(_WIN32)
-            if (FAILED((*dx9SurfaceDst)->LockRect(&rect, NULL, 0)))
-            {
-                log_error("Surface lock failed\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-
-            pitch = rect.Pitch / sizeof(T);
-            lineSize = width * planeNum * sizeof(T);
-            ptr = static_cast<T *>(rect.pBits);
-            for (size_t y = 0; y < height; ++y)
-                memcpy(&out[y * width * planeNum], ptr + y * pitch, lineSize);
-
-            (*dx9SurfaceDst)->UnlockRect();
-#else
-            return TEST_NOT_IMPLEMENTED;
-#endif
-
-            if (!DataCompare(surfaceFormat, format.image_channel_data_type, out,
-                             bufferExp[frameIdx % FRAME_NUM], width, height,
-                             planeNum))
-            {
-                log_error(
-                    "Frame idx: %i, media object is different then expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return result.Result();
-}
-
-int test_other_data_types(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-{
-    CResult result;
-
-#if defined(_WIN32)
-    // D3D9
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_D3D9_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_D3D9_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, L16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(
-            deviceID, context, queue, num_elements, 10, 32, 1024,
-            CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(
-            deviceID, context, queue, num_elements, 10, 64, 64,
-            CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-            CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9, A32B32G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_D3D9_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9, A16B16G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_D3D9_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9, A16B16G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, X8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, X8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // D3D9EX
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   32, 1024, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   32, 1024, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_G32R32F,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  64, 64, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  64, 64, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    256, 256, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-            CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A32B32G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A32B32G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, X8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, X8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, X8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, X8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    // DXVA
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(
-            deviceID, context, queue, num_elements, 10, 32, 1024,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(
-            deviceID, context, queue, num_elements, 10, 32, 1024,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(
-            deviceID, context, queue, num_elements, 10, 64, 64,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  64, 64, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A32B32G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A32B32G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A16B16G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A16B16G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A16B16G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A16B16G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 128, 64,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 16, 512,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 512, 16,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-            CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-
-#else
-    return TEST_NOT_IMPLEMENTED;
-#endif
-
-    return result.Result();
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp
deleted file mode 100644
index 87eb13c..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp
+++ /dev/null

@@ -1,1664 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "utils.h"
-
-#include "harness/errorHelpers.h"
-#include "harness/imageHelpers.h"
-#include "harness/rounding_mode.h"
-
-#include <math.h>
-
-#include <CL/cl_half.h>
-
-static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
-
-
-CResult::CResult(): _result(TEST_PASS), _resultLast(TEST_NORESULT) {}
-
-CResult::~CResult() {}
-
-CResult::TTestResult CResult::ResultLast() const { return _resultLast; }
-
-int CResult::Result() const
-{
-    switch (_result)
-    {
-        case TEST_NORESULT:
-        case TEST_NOTSUPPORTED:
-        case TEST_PASS: return 0; break;
-        case TEST_FAIL: return 1; break;
-        case TEST_ERROR: return 2; break;
-        default: return -1; break;
-    }
-}
-
-void CResult::ResultSub(TTestResult result)
-{
-    _resultLast = result;
-    if (static_cast<int>(result) > static_cast<int>(_result)) _result = result;
-}
-
-void FunctionContextCreateToString(TContextFuncType contextCreateFunction,
-                                   std::string &contextFunction)
-{
-    switch (contextCreateFunction)
-    {
-        case CONTEXT_CREATE_DEFAULT: contextFunction = "CreateContext"; break;
-        case CONTEXT_CREATE_FROM_TYPE:
-            contextFunction = "CreateContextFromType";
-            break;
-        default:
-            contextFunction = "Unknown";
-            log_error("FunctionContextCreateToString(): Unknown create "
-                      "function enum!");
-            break;
-    }
-}
-
-void AdapterToString(cl_dx9_media_adapter_type_khr adapterType,
-                     std::string &adapter)
-{
-    switch (adapterType)
-    {
-        case CL_ADAPTER_D3D9_KHR: adapter = "D3D9"; break;
-        case CL_ADAPTER_D3D9EX_KHR: adapter = "D3D9EX"; break;
-        case CL_ADAPTER_DXVA_KHR: adapter = "DXVA"; break;
-        default:
-            adapter = "Unknown";
-            log_error("AdapterToString(): Unknown adapter type!");
-            break;
-    }
-}
-
-cl_context_info
-AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType)
-{
-    switch (adapterType)
-    {
-        case CL_ADAPTER_D3D9_KHR: return CL_CONTEXT_ADAPTER_D3D9_KHR; break;
-        case CL_ADAPTER_D3D9EX_KHR: return CL_CONTEXT_ADAPTER_D3D9EX_KHR; break;
-        case CL_ADAPTER_DXVA_KHR: return CL_CONTEXT_ADAPTER_DXVA_KHR; break;
-        default:
-            log_error("AdapterTypeToContextInfo(): Unknown adapter type!");
-            return 0;
-            break;
-    }
-}
-
-void YUVGenerateNV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd)
-{
-    yuv.clear();
-    yuv.resize(width * height * 3 / 2, 0);
-
-    double min = static_cast<double>(valueMin);
-    double max = static_cast<double>(valueMax);
-    double range = 255;
-    double add = static_cast<double>(valueAdd * range);
-    double stepX = (max - min) / static_cast<double>(width);
-    double stepY = (max - min) / static_cast<double>(height);
-
-    // generate Y plane
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int offset = i * width;
-        double valueYPlane0 = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueXPlane0 = static_cast<double>(stepX * j);
-            yuv.at(offset + j) = static_cast<cl_uchar>(
-                min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
-        }
-    }
-
-    // generate UV planes
-    for (unsigned int i = 0; i < height / 2; ++i)
-    {
-        unsigned int offset = width * height + i * width;
-        double valueYPlane1 = static_cast<double>(stepY * i);
-        double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
-        for (unsigned int j = 0; j < width / 2; ++j)
-        {
-            double valueXPlane1 = static_cast<double>(stepX * j);
-            double valueXPlane2 = static_cast<double>(stepX * (width / 2 + j));
-
-            yuv.at(offset + j * 2) = static_cast<cl_uchar>(
-                min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
-            yuv.at(offset + j * 2 + 1) = static_cast<cl_uchar>(
-                min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
-        }
-    }
-}
-
-void YUVGenerateYV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd /*= 0.0*/)
-{
-    yuv.clear();
-    yuv.resize(width * height * 3 / 2, 0);
-
-    double min = static_cast<double>(valueMin);
-    double max = static_cast<double>(valueMax);
-    double range = 255;
-    double add = static_cast<double>(valueAdd * range);
-    double stepX = (max - min) / static_cast<double>(width);
-    double stepY = (max - min) / static_cast<double>(height);
-
-    unsigned offset = 0;
-
-    // generate Y plane
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int plane0Offset = offset + i * width;
-        double valueYPlane0 = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueXPlane0 = static_cast<double>(stepX * j);
-            yuv.at(plane0Offset + j) = static_cast<cl_uchar>(
-                min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
-        }
-    }
-
-    // generate V plane
-    offset += width * height;
-    for (unsigned int i = 0; i < height / 2; ++i)
-    {
-        unsigned int plane1Offset = offset + i * width / 2;
-        double valueYPlane1 = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width / 2; ++j)
-        {
-            double valueXPlane1 = static_cast<double>(stepX * j);
-            yuv.at(plane1Offset + j) = static_cast<cl_uchar>(
-                min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
-        }
-    }
-
-    // generate U plane
-    offset += width * height / 4;
-    for (unsigned int i = 0; i < height / 2; ++i)
-    {
-        unsigned int plane2Offset = offset + i * width / 2;
-        double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
-        for (unsigned int j = 0; j < width / 2; ++j)
-        {
-            double valueXPlane2 = static_cast<double>(stepX * j);
-            yuv.at(plane2Offset + j) = static_cast<cl_uchar>(
-                min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
-        }
-    }
-}
-
-
-bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv,
-                 unsigned int width, unsigned int height, cl_uchar valueMin,
-                 cl_uchar valueMax, double valueAdd /*= 0.0*/)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd);
-            break;
-        case SURFACE_FORMAT_YV12:
-            YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd);
-            break;
-        default:
-            log_error("YUVGenerate(): Invalid surface type\n");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-bool YUVSurfaceSetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-{
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceSetNV12(): Surface lock failed\n");
-        return false;
-    }
-
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t lineSize = width * sizeof(cl_uchar);
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-    for (size_t y = 0; y < height; ++y)
-        memcpy(ptr + y * pitch, &yuv.at(y * width), lineSize);
-
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(ptr + height * pitch + y * pitch,
-               &yuv.at(width * height + y * width), lineSize);
-
-    (*d3dSurface)->UnlockRect();
-
-    return true;
-
-#else
-    return false;
-#endif
-}
-
-bool YUVSurfaceSetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-{
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceSetYV12(): Surface lock failed!\n");
-        return false;
-    }
-
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t pitchHalf = pitch / 2;
-    size_t lineSize = width * sizeof(cl_uchar);
-    size_t lineHalfSize = lineSize / 2;
-    size_t surfaceOffset = 0;
-    size_t yuvOffset = 0;
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-
-    for (size_t y = 0; y < height; ++y)
-        memcpy(ptr + surfaceOffset + y * pitch, &yuv.at(yuvOffset + y * width),
-               lineSize);
-
-    surfaceOffset += height * pitch;
-    yuvOffset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(ptr + surfaceOffset + y * pitchHalf,
-               &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
-
-    surfaceOffset += pitchHalf * height / 2;
-    yuvOffset += width * height / 4;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(ptr + surfaceOffset + y * pitchHalf,
-               &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
-
-    (*d3dSurface)->UnlockRect();
-
-    return true;
-
-#else
-    return false;
-#endif
-}
-
-bool YUVSurfaceSet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   const std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            if (!YUVSurfaceSetNV12(surface, yuv, width, height)) return false;
-            break;
-        case SURFACE_FORMAT_YV12:
-            if (!YUVSurfaceSetYV12(surface, yuv, width, height)) return false;
-            break;
-        default:
-            log_error("YUVSurfaceSet(): Invalid surface type!\n");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-bool YUVSurfaceGetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-{
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceGetNV12(): Surface lock failed!\n");
-        return false;
-    }
-
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t lineSize = width * sizeof(cl_uchar);
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-    size_t yuvOffset = 0;
-    size_t surfaceOffset = 0;
-    for (size_t y = 0; y < height; ++y)
-        memcpy(&yuv.at(yuvOffset + y * width), ptr + y * pitch, lineSize);
-
-    yuvOffset += width * height;
-    surfaceOffset += pitch * height;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch,
-               lineSize);
-
-    (*d3dSurface)->UnlockRect();
-
-    return true;
-
-#else
-    return false;
-#endif
-}
-
-bool YUVSurfaceGetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-{
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceGetYV12(): Surface lock failed!\n");
-        return false;
-    }
-
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t pitchHalf = pitch / 2;
-    size_t lineSize = width * sizeof(cl_uchar);
-    size_t lineHalfSize = lineSize / 2;
-    size_t surfaceOffset = 0;
-    size_t yuvOffset = 0;
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-
-    for (size_t y = 0; y < height; ++y)
-        memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch,
-               lineSize);
-
-    surfaceOffset += pitch * height;
-    yuvOffset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(&yuv.at(yuvOffset + y * lineHalfSize),
-               ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
-
-    surfaceOffset += pitchHalf * height / 2;
-    yuvOffset += width * height / 4;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(&yuv.at(yuvOffset + y * lineHalfSize),
-               ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
-
-    (*d3dSurface)->UnlockRect();
-
-    return true;
-
-#else
-    return false;
-#endif
-}
-
-bool YUVSurfaceGet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            if (!YUVSurfaceGetNV12(surface, yuv, width, height)) return false;
-            break;
-        case SURFACE_FORMAT_YV12:
-            if (!YUVSurfaceGetYV12(surface, yuv, width, height)) return false;
-            break;
-        default:
-            log_error("YUVSurfaceGet(): Invalid surface type!\n");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-bool YUVCompareNV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height)
-{
-    // plane 0 verification
-    size_t offset = 0;
-    for (size_t y = 0; y < height; ++y)
-    {
-        size_t plane0Offset = offset + width * y;
-        for (size_t x = 0; x < width; ++x)
-        {
-            if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x])
-            {
-                log_error("Plane 0 (Y) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-
-    // plane 1 and 2 verification
-    offset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-    {
-        size_t plane12Offset = offset + width * y;
-        for (size_t x = 0; x < width / 2; ++x)
-        {
-            if (yuvTest.at(plane12Offset + 2 * x)
-                != yuvRef.at(plane12Offset + 2 * x))
-            {
-                log_error("Plane 1 (U) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane12Offset + 2 * x],
-                          yuvTest[plane12Offset + 2 * x], x, y);
-                return false;
-            }
-
-            if (yuvTest.at(plane12Offset + 2 * x + 1)
-                != yuvRef.at(plane12Offset + 2 * x + 1))
-            {
-                log_error("Plane 2 (V) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane12Offset + 2 * x + 1],
-                          yuvTest[plane12Offset + 2 * x + 1], x, y);
-                return false;
-            }
-        }
-    }
-
-    return true;
-}
-
-bool YUVCompareYV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height)
-{
-    // plane 0 verification
-    size_t offset = 0;
-    for (size_t y = 0; y < height; ++y)
-    {
-        size_t plane0Offset = width * y;
-        for (size_t x = 0; x < width; ++x)
-        {
-            if (yuvTest.at(plane0Offset + x) != yuvRef.at(plane0Offset + x))
-            {
-                log_error("Plane 0 (Y) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-
-    // plane 1 verification
-    offset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-    {
-        size_t plane1Offset = offset + width * y / 2;
-        for (size_t x = 0; x < width / 2; ++x)
-        {
-            if (yuvTest.at(plane1Offset + x) != yuvRef.at(plane1Offset + x))
-            {
-                log_error("Plane 1 (V) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane1Offset + x], yuvTest[plane1Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-
-    // plane 2 verification
-    offset += width * height / 4;
-    for (size_t y = 0; y < height / 2; ++y)
-    {
-        size_t plane2Offset = offset + width * y / 2;
-        for (size_t x = 0; x < width / 2; ++x)
-        {
-            if (yuvTest.at(plane2Offset + x) != yuvRef.at(plane2Offset + x))
-            {
-                log_error("Plane 2 (U) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane2Offset + x], yuvTest[plane2Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-
-    return true;
-}
-
-bool YUVCompare(TSurfaceFormat surfaceFormat,
-                const std::vector<cl_uchar> &yuvTest,
-                const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                unsigned int height)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            if (!YUVCompareNV12(yuvTest, yuvRef, width, height))
-            {
-                log_error("OCL object is different than expected!\n");
-                return false;
-            }
-            break;
-        case SURFACE_FORMAT_YV12:
-            if (!YUVCompareYV12(yuvTest, yuvRef, width, height))
-            {
-                log_error("OCL object is different than expected!\n");
-                return false;
-            }
-            break;
-        default:
-            log_error("YUVCompare(): Invalid surface type!\n");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<float> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin /*= 0.0f*/, float cmax /*= 1.0f*/,
-                  float add /*= 0.0f*/)
-{
-    data.clear();
-    data.reserve(width * height * channelNum);
-
-    double valueMin = static_cast<double>(cmin);
-    double valueMax = static_cast<double>(cmax);
-    double stepX = (valueMax - valueMin) / static_cast<double>(width);
-    double stepY = (valueMax - valueMin) / static_cast<double>(height);
-    double valueAdd = static_cast<double>(add);
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        double valueY = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueX = static_cast<double>(stepX * j);
-            switch (channelNum)
-            {
-                case 1:
-                    data.push_back(static_cast<float>(valueMin + valueX / 2
-                                                      + valueY / 2 + valueAdd));
-                    break;
-                case 2:
-                    data.push_back(
-                        static_cast<float>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueY + valueAdd));
-                    break;
-                case 4:
-                    data.push_back(
-                        static_cast<float>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueY + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueX / 2 + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueY / 2 + valueAdd));
-                    break;
-                default:
-                    log_error("DataGenerate(): invalid channel number!");
-                    return;
-                    break;
-            }
-        }
-    }
-}
-
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_half> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin /*= 0.0f*/, float cmax /*= 1.0f*/,
-                  float add /*= 0.0f*/)
-{
-    data.clear();
-    data.reserve(width * height * channelNum);
-
-    double valueMin = static_cast<double>(cmin);
-    double valueMax = static_cast<double>(cmax);
-    double stepX = (valueMax - valueMin) / static_cast<double>(width);
-    double stepY = (valueMax - valueMin) / static_cast<double>(height);
-
-    switch (type)
-    {
-        case CL_HALF_FLOAT: {
-            double valueAdd = static_cast<double>(add);
-
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                double valueY = static_cast<double>(stepY * i);
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    double valueX = static_cast<double>(stepX * j);
-                    switch (channelNum)
-                    {
-                        case 1:
-                            data.push_back(convert_float_to_half(
-                                static_cast<float>(valueMin + valueX / 2
-                                                   + valueY / 2 + valueAdd)));
-                            break;
-                        case 2:
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueX + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueY + valueAdd)));
-                            break;
-                        case 4:
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueX + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueY + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueX / 2 + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueY / 2 + valueAdd)));
-                            break;
-                        default:
-                            log_error(
-                                "DataGenerate(): invalid channel number!");
-                            return;
-                            break;
-                    }
-                }
-            }
-            break;
-        }
-        case CL_UNORM_INT16: {
-            double range = 65535;
-            double valueAdd = static_cast<double>(add * range);
-
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                double valueY = static_cast<double>(stepY * i * range);
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    double valueX = static_cast<double>(stepX * j * range);
-                    switch (channelNum)
-                    {
-                        case 1:
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX / 2 + valueY / 2 + valueAdd));
-                            break;
-                        case 2:
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueY + valueAdd));
-                            break;
-                        case 4:
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueY + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX / 2 + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueY / 2 + valueAdd));
-                            break;
-                        default:
-                            log_error(
-                                "DataGenerate(): invalid channel number!");
-                            return;
-                            break;
-                    }
-                }
-            }
-        }
-        break;
-        default:
-            log_error("DataGenerate(): unknown data type!");
-            return;
-            break;
-    }
-}
-
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_uchar> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin /*= 0.0f*/, float cmax /*= 1.0f*/,
-                  float add /*= 0.0f*/)
-{
-    data.clear();
-    data.reserve(width * height * channelNum);
-
-    double valueMin = static_cast<double>(cmin);
-    double valueMax = static_cast<double>(cmax);
-    double stepX = (valueMax - valueMin) / static_cast<double>(width);
-    double stepY = (valueMax - valueMin) / static_cast<double>(height);
-
-    double range = 255;
-    double valueAdd = static_cast<double>(add * range);
-
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        double valueY = static_cast<double>(stepY * i * range);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueX = static_cast<double>(stepX * j * range);
-            switch (channelNum)
-            {
-                case 1:
-                    data.push_back(static_cast<cl_uchar>(
-                        valueMin + valueX / 2 + valueY / 2 + valueAdd));
-                    break;
-                case 2:
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueY + valueAdd));
-                    break;
-                case 4:
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueY + valueAdd));
-                    data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2
-                                                         + valueAdd));
-                    if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8)
-                        data.push_back(static_cast<cl_uchar>(0xff));
-                    else
-                        data.push_back(static_cast<cl_uchar>(
-                            valueMin + valueY / 2 + valueAdd));
-                    break;
-                default:
-                    log_error("DataGenerate(): invalid channel number!");
-                    return;
-                    break;
-            }
-        }
-    }
-}
-
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<float> &dataTest,
-                 const std::vector<float> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum)
-{
-    float epsilon = 0.000001f;
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int offset = i * width * channelNum;
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            for (unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
-            {
-                if (abs(dataTest.at(offset + j * channelNum + planeIdx)
-                        - dataExp.at(offset + j * channelNum + planeIdx))
-                    > epsilon)
-                {
-                    log_error(
-                        "Tested image is different than reference (x,y,plane) "
-                        "= (%i,%i,%i), test value = %f, expected value = %f\n",
-                        j, i, planeIdx,
-                        dataTest[offset + j * channelNum + planeIdx],
-                        dataExp[offset + j * channelNum + planeIdx]);
-                    return false;
-                }
-            }
-        }
-    }
-
-    return true;
-}
-
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_half> &dataTest,
-                 const std::vector<cl_half> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum)
-{
-    switch (type)
-    {
-        case CL_HALF_FLOAT: {
-            float epsilon = 0.001f;
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                unsigned int offset = i * width * channelNum;
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    for (unsigned planeIdx = 0; planeIdx < channelNum;
-                         ++planeIdx)
-                    {
-                        float test = cl_half_to_float(
-                            dataTest.at(offset + j * channelNum + planeIdx));
-                        float ref = cl_half_to_float(
-                            dataExp.at(offset + j * channelNum + planeIdx));
-                        if (abs(test - ref) > epsilon)
-                        {
-                            log_error("Tested image is different than "
-                                      "reference (x,y,plane) = "
-                                      "(%i,%i,%i), test value = %f, expected "
-                                      "value = %f\n",
-                                      j, i, planeIdx, test, ref);
-                            return false;
-                        }
-                    }
-                }
-            }
-        }
-        break;
-        case CL_UNORM_INT16: {
-            cl_ushort epsilon = 1;
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                unsigned int offset = i * width * channelNum;
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    for (unsigned planeIdx = 0; planeIdx < channelNum;
-                         ++planeIdx)
-                    {
-                        cl_ushort test =
-                            dataTest.at(offset + j * channelNum + planeIdx);
-                        cl_ushort ref =
-                            dataExp.at(offset + j * channelNum + planeIdx);
-                        if (abs(test - ref) > epsilon)
-                        {
-                            log_error("Tested image is different than "
-                                      "reference (x,y,plane) = (%i,%i,%i), "
-                                      "test value = %i, expected value = %i\n",
-                                      j, i, planeIdx, test, ref);
-                            return false;
-                        }
-                    }
-                }
-            }
-        }
-        break;
-        default:
-            log_error("DataCompare(): Invalid data format!");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_uchar> &dataTest,
-                 const std::vector<cl_uchar> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int planeNum)
-{
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int offset = i * width * planeNum;
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            for (unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx)
-            {
-                if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3)
-                    continue;
-
-                cl_uchar test = dataTest.at(offset + j * planeNum + planeIdx);
-                cl_uchar ref = dataExp.at(offset + j * planeNum + planeIdx);
-                if (test != ref)
-                {
-                    log_error(
-                        "Tested image is different than reference (x,y,plane) "
-                        "= (%i,%i,%i), test value = %i, expected value = %i\n",
-                        j, i, planeIdx, test, ref);
-                    return false;
-                }
-            }
-        }
-    }
-
-    return true;
-}
-
-bool GetImageInfo(cl_mem object, cl_image_format formatExp,
-                  size_t elementSizeExp, size_t rowPitchExp,
-                  size_t slicePitchExp, size_t widthExp, size_t heightExp,
-                  size_t depthExp, unsigned int planeExp)
-{
-    bool result = true;
-
-    cl_image_format format;
-    if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format),
-                       &format, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n");
-        result = false;
-    }
-
-    if (formatExp.image_channel_order != format.image_channel_order
-        || formatExp.image_channel_data_type != format.image_channel_data_type)
-    {
-        log_error("Value of CL_IMAGE_FORMAT is different than expected\n");
-        result = false;
-    }
-
-    size_t elementSize = 0;
-    if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t),
-                       &elementSize, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n");
-        result = false;
-    }
-
-    if (elementSizeExp != elementSize)
-    {
-        log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected "
-                  "(size: %i, exp size: %i)\n",
-                  elementSize, elementSizeExp);
-        result = false;
-    }
-
-    size_t rowPitch = 0;
-    if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n");
-        result = false;
-    }
-
-    if ((rowPitchExp == 0 && rowPitchExp != rowPitch)
-        || (rowPitchExp > 0 && rowPitchExp > rowPitch))
-    {
-        log_error("Value of CL_IMAGE_ROW_PITCH is different than expected "
-                  "(size: %i, exp size: %i)\n",
-                  rowPitch, rowPitchExp);
-        result = false;
-    }
-
-    size_t slicePitch = 0;
-    if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t),
-                       &slicePitch, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n");
-        result = false;
-    }
-
-    if ((slicePitchExp == 0 && slicePitchExp != slicePitch)
-        || (slicePitchExp > 0 && slicePitchExp > slicePitch))
-    {
-        log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected "
-                  "(size: %i, exp size: %i)\n",
-                  slicePitch, slicePitchExp);
-        result = false;
-    }
-
-    size_t width = 0;
-    if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n");
-        result = false;
-    }
-
-    if (widthExp != width)
-    {
-        log_error("Value of CL_IMAGE_WIDTH is different than expected (size: "
-                  "%i, exp size: %i)\n",
-                  width, widthExp);
-        result = false;
-    }
-
-    size_t height = 0;
-    if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n");
-        result = false;
-    }
-
-    if (heightExp != height)
-    {
-        log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: "
-                  "%i, exp size: %i)\n",
-                  height, heightExp);
-        result = false;
-    }
-
-    size_t depth = 0;
-    if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n");
-        result = false;
-    }
-
-    if (depthExp != depth)
-    {
-        log_error("Value of CL_IMAGE_DEPTH is different than expected (size: "
-                  "%i, exp size: %i)\n",
-                  depth, depthExp);
-        result = false;
-    }
-
-    unsigned int plane = 99;
-    size_t paramSize = 0;
-    if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR,
-                       sizeof(unsigned int), &plane, &paramSize)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n");
-        result = false;
-    }
-
-    if (planeExp != plane)
-    {
-        log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than "
-                  "expected (plane: %i, exp plane: %i)\n",
-                  plane, planeExp);
-        result = false;
-    }
-
-    return result;
-}
-
-bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   void *shareHandleExp)
-{
-    bool result = true;
-    switch (adapterType)
-    {
-        case CL_ADAPTER_D3D9_KHR:
-        case CL_ADAPTER_D3D9EX_KHR:
-        case CL_ADAPTER_DXVA_KHR: {
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-#else
-            void *surfaceInfo = 0;
-            return false;
-#endif
-            size_t paramSize = 0;
-            if (clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR,
-                                   sizeof(surfaceInfo), &surfaceInfo,
-                                   &paramSize)
-                != CL_SUCCESS)
-            {
-                log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) "
-                          "failed\n");
-                result = false;
-            }
-
-#if defined(_WIN32)
-            CD3D9SurfaceWrapper *d3d9Surface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            if (*d3d9Surface != surfaceInfo.resource)
-            {
-                log_error(
-                    "Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
-                result = false;
-            }
-
-            if (shareHandleExp != surfaceInfo.shared_handle)
-            {
-                log_error("Invalid shared handle for "
-                          "CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
-                result = false;
-            }
-#else
-            return false;
-#endif
-
-            if (paramSize != sizeof(surfaceInfo))
-            {
-                log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter "
-                          "size: %i, expected: %i\n",
-                          paramSize, sizeof(surfaceInfo));
-                result = false;
-            }
-
-            paramSize = 0;
-            cl_dx9_media_adapter_type_khr mediaAdapterType;
-            if (clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR,
-                                   sizeof(mediaAdapterType), &mediaAdapterType,
-                                   &paramSize)
-                != CL_SUCCESS)
-            {
-                log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) "
-                          "failed\n");
-                result = false;
-            }
-
-            if (adapterType != mediaAdapterType)
-            {
-                log_error("Invalid media adapter type for "
-                          "CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n");
-                result = false;
-            }
-
-            if (paramSize != sizeof(mediaAdapterType))
-            {
-                log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter "
-                          "size: %i, expected: %i\n",
-                          paramSize, sizeof(mediaAdapterType));
-                result = false;
-            }
-        }
-        break;
-        default:
-            log_error("GetMemObjInfo(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-
-    return result;
-}
-
-bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType,
-                     const std::vector<cl_mem> &memObjList, unsigned int width,
-                     unsigned int height,
-                     std::auto_ptr<CSurfaceWrapper> &surface,
-                     void *sharedHandle)
-{
-    if (memObjList.size() != 2 && memObjList.size() != 3)
-    {
-        log_error("ImageInfoVerify(): Invalid object list parameter\n");
-        return false;
-    }
-
-    cl_image_format formatPlane;
-    formatPlane.image_channel_data_type = CL_UNORM_INT8;
-    formatPlane.image_channel_order = CL_R;
-
-    // plane 0 verification
-    if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar),
-                      width * sizeof(cl_uchar), 0, width, height, 0, 0))
-    {
-        log_error("clGetImageInfo failed\n");
-        return false;
-    }
-
-    switch (memObjList.size())
-    {
-        case 2: {
-            formatPlane.image_channel_data_type = CL_UNORM_INT8;
-            formatPlane.image_channel_order = CL_RG;
-            if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2,
-                              width * sizeof(cl_uchar), 0, width / 2,
-                              height / 2, 0, 1))
-            {
-                log_error("clGetImageInfo failed\n");
-                return false;
-            }
-        }
-        break;
-        case 3: {
-            if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar),
-                              width * sizeof(cl_uchar) / 2, 0, width / 2,
-                              height / 2, 0, 1))
-            {
-                log_error("clGetImageInfo failed\n");
-                return false;
-            }
-
-            if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar),
-                              width * sizeof(cl_uchar) / 2, 0, width / 2,
-                              height / 2, 0, 2))
-            {
-                log_error("clGetImageInfo failed\n");
-                return false;
-            }
-        }
-        break;
-        default:
-            log_error("ImageInfoVerify(): Invalid object list parameter\n");
-            return false;
-            break;
-    }
-
-    for (size_t i = 0; i < memObjList.size(); ++i)
-    {
-        if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle))
-        {
-            log_error("clGetMemObjInfo(%i) failed\n", i);
-            return false;
-        }
-    }
-
-    return true;
-}
-
-bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType,
-                      const cl_image_format imageFormatCheck)
-{
-    cl_uint imageFormatsNum = 0;
-    cl_int error = clGetSupportedImageFormats(
-        context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum);
-    if (error != CL_SUCCESS)
-    {
-        log_error("clGetSupportedImageFormats failed\n");
-        return false;
-    }
-
-    if (imageFormatsNum < 1)
-    {
-        log_error("Invalid image format number returned by "
-                  "clGetSupportedImageFormats\n");
-        return false;
-    }
-
-    std::vector<cl_image_format> imageFormats(imageFormatsNum);
-    error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType,
-                                       imageFormatsNum, &imageFormats[0], 0);
-    if (error != CL_SUCCESS)
-    {
-        log_error("clGetSupportedImageFormats failed\n");
-        return false;
-    }
-
-    for (cl_uint i = 0; i < imageFormatsNum; ++i)
-    {
-        if (imageFormats[i].image_channel_data_type
-                == imageFormatCheck.image_channel_data_type
-            && imageFormats[i].image_channel_order
-                == imageFormatCheck.image_channel_order)
-        {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-unsigned int ChannelNum(TSurfaceFormat surfaceFormat)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F:
-        case SURFACE_FORMAT_R16F:
-        case SURFACE_FORMAT_L16:
-        case SURFACE_FORMAT_A8:
-        case SURFACE_FORMAT_L8: return 1; break;
-        case SURFACE_FORMAT_G32R32F:
-        case SURFACE_FORMAT_G16R16F:
-        case SURFACE_FORMAT_G16R16:
-        case SURFACE_FORMAT_A8L8: return 2; break;
-        case SURFACE_FORMAT_NV12:
-        case SURFACE_FORMAT_YV12: return 3; break;
-        case SURFACE_FORMAT_A32B32G32R32F:
-        case SURFACE_FORMAT_A16B16G16R16F:
-        case SURFACE_FORMAT_A16B16G16R16:
-        case SURFACE_FORMAT_A8B8G8R8:
-        case SURFACE_FORMAT_X8B8G8R8:
-        case SURFACE_FORMAT_A8R8G8B8:
-        case SURFACE_FORMAT_X8R8G8B8: return 4; break;
-        default:
-            log_error("ChannelNum(): unknown surface format!\n");
-            return 0;
-            break;
-    }
-}
-
-unsigned int PlanesNum(TSurfaceFormat surfaceFormat)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F:
-        case SURFACE_FORMAT_R16F:
-        case SURFACE_FORMAT_L16:
-        case SURFACE_FORMAT_A8:
-        case SURFACE_FORMAT_L8:
-        case SURFACE_FORMAT_G32R32F:
-        case SURFACE_FORMAT_G16R16F:
-        case SURFACE_FORMAT_G16R16:
-        case SURFACE_FORMAT_A8L8:
-        case SURFACE_FORMAT_A32B32G32R32F:
-        case SURFACE_FORMAT_A16B16G16R16F:
-        case SURFACE_FORMAT_A16B16G16R16:
-        case SURFACE_FORMAT_A8B8G8R8:
-        case SURFACE_FORMAT_X8B8G8R8:
-        case SURFACE_FORMAT_A8R8G8B8:
-        case SURFACE_FORMAT_X8R8G8B8: return 1; break;
-        case SURFACE_FORMAT_NV12: return 2; break;
-        case SURFACE_FORMAT_YV12: return 3; break;
-        default:
-            log_error("PlanesNum(): unknown surface format!\n");
-            return 0;
-            break;
-    }
-}
-
-#if defined(_WIN32)
-D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F: return D3DFMT_R32F; break;
-        case SURFACE_FORMAT_R16F: return D3DFMT_R16F; break;
-        case SURFACE_FORMAT_L16: return D3DFMT_L16; break;
-        case SURFACE_FORMAT_A8: return D3DFMT_A8; break;
-        case SURFACE_FORMAT_L8: return D3DFMT_L8; break;
-        case SURFACE_FORMAT_G32R32F: return D3DFMT_G32R32F; break;
-        case SURFACE_FORMAT_G16R16F: return D3DFMT_G16R16F; break;
-        case SURFACE_FORMAT_G16R16: return D3DFMT_G16R16; break;
-        case SURFACE_FORMAT_A8L8: return D3DFMT_A8L8; break;
-        case SURFACE_FORMAT_A32B32G32R32F: return D3DFMT_A32B32G32R32F; break;
-        case SURFACE_FORMAT_A16B16G16R16F: return D3DFMT_A16B16G16R16F; break;
-        case SURFACE_FORMAT_A16B16G16R16: return D3DFMT_A16B16G16R16; break;
-        case SURFACE_FORMAT_A8B8G8R8: return D3DFMT_A8B8G8R8; break;
-        case SURFACE_FORMAT_X8B8G8R8: return D3DFMT_X8B8G8R8; break;
-        case SURFACE_FORMAT_A8R8G8B8: return D3DFMT_A8R8G8B8; break;
-        case SURFACE_FORMAT_X8R8G8B8: return D3DFMT_X8R8G8B8; break;
-        case SURFACE_FORMAT_NV12:
-            return static_cast<D3DFORMAT>(MAKEFOURCC('N', 'V', '1', '2'));
-            break;
-        case SURFACE_FORMAT_YV12:
-            return static_cast<D3DFORMAT>(MAKEFOURCC('Y', 'V', '1', '2'));
-            break;
-        default:
-            log_error("SurfaceFormatToD3D(): unknown surface format!\n");
-            return D3DFMT_R32F;
-            break;
-    }
-}
-#endif
-
-bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                  std::auto_ptr<CDeviceWrapper> &device)
-{
-    switch (adapterType)
-    {
-#if defined(_WIN32)
-        case CL_ADAPTER_D3D9_KHR:
-            device = std::auto_ptr<CDeviceWrapper>(new CD3D9Wrapper());
-            break;
-        case CL_ADAPTER_D3D9EX_KHR:
-            device = std::auto_ptr<CDeviceWrapper>(new CD3D9ExWrapper());
-            break;
-        case CL_ADAPTER_DXVA_KHR:
-            device = std::auto_ptr<CDeviceWrapper>(new CDXVAWrapper());
-            break;
-#endif
-        default:
-            log_error("DeviceCreate(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-
-    return device->Status();
-}
-
-bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType,
-                        const CDeviceWrapper &device,
-                        TSurfaceFormat surfaceFormat)
-{
-    switch (adapterType)
-    {
-#if defined(_WIN32)
-        case CL_ADAPTER_D3D9_KHR:
-        case CL_ADAPTER_D3D9EX_KHR:
-        case CL_ADAPTER_DXVA_KHR: {
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            LPDIRECT3D9 d3d9 = static_cast<LPDIRECT3D9>(device.D3D());
-            D3DDISPLAYMODE d3ddm;
-            d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm);
-
-            if (FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT,
-                                               D3DDEVTYPE_HAL, d3ddm.Format, 0,
-                                               D3DRTYPE_SURFACE, d3dFormat)))
-                return false;
-        }
-        break;
-#endif
-        default:
-            log_error("SurfaceFormatCheck(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_FLOAT;
-            break;
-        case SURFACE_FORMAT_R16F:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_HALF_FLOAT;
-            break;
-        case SURFACE_FORMAT_L16:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT16;
-            break;
-        case SURFACE_FORMAT_A8:
-            format.image_channel_order = CL_A;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_L8:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_G32R32F:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_FLOAT;
-            break;
-        case SURFACE_FORMAT_G16R16F:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_HALF_FLOAT;
-            break;
-        case SURFACE_FORMAT_G16R16:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_UNORM_INT16;
-            break;
-        case SURFACE_FORMAT_A8L8:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_A32B32G32R32F:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_FLOAT;
-            break;
-        case SURFACE_FORMAT_A16B16G16R16F:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_HALF_FLOAT;
-            break;
-        case SURFACE_FORMAT_A16B16G16R16:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNORM_INT16;
-            break;
-        case SURFACE_FORMAT_A8B8G8R8:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_X8B8G8R8:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_A8R8G8B8:
-            format.image_channel_order = CL_BGRA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_X8R8G8B8:
-            format.image_channel_order = CL_BGRA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_NV12:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_YV12:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        default:
-            log_error("SurfaceFormatToOCL(): Unknown surface format!\n");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str)
-{
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F: str = "R32F"; break;
-        case SURFACE_FORMAT_R16F: str = "R16F"; break;
-        case SURFACE_FORMAT_L16: str = "L16"; break;
-        case SURFACE_FORMAT_A8: str = "A8"; break;
-        case SURFACE_FORMAT_L8: str = "L8"; break;
-        case SURFACE_FORMAT_G32R32F: str = "G32R32F"; break;
-        case SURFACE_FORMAT_G16R16F: str = "G16R16F"; break;
-        case SURFACE_FORMAT_G16R16: str = "G16R16"; break;
-        case SURFACE_FORMAT_A8L8: str = "A8L8"; break;
-        case SURFACE_FORMAT_A32B32G32R32F: str = "A32B32G32R32F"; break;
-        case SURFACE_FORMAT_A16B16G16R16F: str = "A16B16G16R16F"; break;
-        case SURFACE_FORMAT_A16B16G16R16: str = "A16B16G16R16"; break;
-        case SURFACE_FORMAT_A8B8G8R8: str = "A8B8G8R8"; break;
-        case SURFACE_FORMAT_X8B8G8R8: str = "X8B8G8R8"; break;
-        case SURFACE_FORMAT_A8R8G8B8: str = "A8R8G8B8"; break;
-        case SURFACE_FORMAT_X8R8G8B8: str = "X8R8G8B8"; break;
-        case SURFACE_FORMAT_NV12: str = "NV12"; break;
-        case SURFACE_FORMAT_YV12: str = "YV12"; break;
-        default:
-            log_error("SurfaceFormatToString(): unknown surface format!\n");
-            str = "unknown";
-            break;
-    }
-}
-
-bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                        unsigned int width, unsigned int height,
-                        TSurfaceFormat surfaceFormat, CDeviceWrapper &device,
-                        std::auto_ptr<CSurfaceWrapper> &surface,
-                        bool sharedHandle, void **objectSharedHandle)
-{
-    switch (adapterType)
-    {
-#if defined(_WIN32)
-        case CL_ADAPTER_D3D9_KHR: {
-            surface =
-                std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
-            CD3D9SurfaceWrapper *d3dSurface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            HRESULT hr = 0;
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device();
-            hr = d3d9Device->CreateOffscreenPlainSurface(
-                width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
-                sharedHandle ? objectSharedHandle : 0);
-
-            if (FAILED(hr))
-            {
-                log_error("CreateOffscreenPlainSurface failed\n");
-                return false;
-            }
-        }
-        break;
-        case CL_ADAPTER_D3D9EX_KHR: {
-            surface =
-                std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
-            CD3D9SurfaceWrapper *d3dSurface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            HRESULT hr = 0;
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            LPDIRECT3DDEVICE9EX d3d9ExDevice =
-                (LPDIRECT3DDEVICE9EX)device.Device();
-            hr = d3d9ExDevice->CreateOffscreenPlainSurface(
-                width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
-                sharedHandle ? objectSharedHandle : 0);
-
-            if (FAILED(hr))
-            {
-                log_error("CreateOffscreenPlainSurface failed\n");
-                return false;
-            }
-        }
-        break;
-        case CL_ADAPTER_DXVA_KHR: {
-            surface =
-                std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
-            CD3D9SurfaceWrapper *d3dSurface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            HRESULT hr = 0;
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device();
-            hr = dxvaDevice->CreateVideoSurface(
-                width, height, d3dFormat, D3DPOOL_DEFAULT, 0,
-                DXVAHD_SURFACE_TYPE_VIDEO_INPUT, 1, &(*d3dSurface),
-                sharedHandle ? objectSharedHandle : 0);
-
-            if (FAILED(hr))
-            {
-                log_error("CreateVideoSurface failed\n");
-                return false;
-            }
-        }
-        break;
-#endif
-        default:
-            log_error("MediaSurfaceCreate(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-
-    return true;
-}
-
-cl_int deviceExistForCLTest(
-    cl_platform_id platform, cl_dx9_media_adapter_type_khr media_adapters_type,
-    void *media_adapters, CResult &result,
-    TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/
-)
-{
-    cl_int _error;
-    cl_uint devicesAllNum = 0;
-    std::string sharedHandleStr =
-        (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-    std::string adapterStr;
-    AdapterToString(media_adapters_type, adapterStr);
-
-    _error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-        platform, 1, &media_adapters_type, &media_adapters,
-        CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
-
-    if (_error != CL_SUCCESS)
-    {
-        if (_error != CL_DEVICE_NOT_FOUND)
-        {
-            log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                      IGetErrorString(_error));
-            result.ResultSub(CResult::TEST_ERROR);
-        }
-        else
-        {
-            log_info("Skipping test case, device type is not supported by a "
-                     "device (adapter type: %s, shared handle: %s)\n",
-                     adapterStr.c_str(), sharedHandleStr.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-
-    return _error;
-}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h
deleted file mode 100644
index 56c0fc2..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h
+++ /dev/null

@@ -1,215 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef __UTILS_KHR_MEDIA_H
-#define __UTILS_KHR_MEDIA_H
-
-#include <string>
-#include <iostream>
-#include <memory>
-#include <vector>
-#include "wrappers.h"
-#include "CL/cl_dx9_media_sharing.h"
-
-#include "harness/typeWrappers.h"
-
-
-extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn
-    clGetDeviceIDsFromDX9MediaAdapterKHR;
-extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR;
-extern clEnqueueAcquireDX9MediaSurfacesKHR_fn
-    clEnqueueAcquireDX9MediaSurfacesKHR;
-extern clEnqueueReleaseDX9MediaSurfacesKHR_fn
-    clEnqueueReleaseDX9MediaSurfacesKHR;
-
-extern cl_platform_id gPlatformIDdetected;
-extern cl_device_id gDeviceIDdetected;
-extern cl_device_type gDeviceTypeSelected;
-
-#define NL "\n"
-#define TEST_NOT_IMPLEMENTED -1
-#define TEST_NOT_SUPPORTED -2
-
-enum TSurfaceFormat
-{
-    SURFACE_FORMAT_NV12,
-    SURFACE_FORMAT_YV12,
-    SURFACE_FORMAT_R32F,
-    SURFACE_FORMAT_R16F,
-    SURFACE_FORMAT_L16,
-    SURFACE_FORMAT_A8,
-    SURFACE_FORMAT_L8,
-    SURFACE_FORMAT_G32R32F,
-    SURFACE_FORMAT_G16R16F,
-    SURFACE_FORMAT_G16R16,
-    SURFACE_FORMAT_A8L8,
-    SURFACE_FORMAT_A32B32G32R32F,
-    SURFACE_FORMAT_A16B16G16R16F,
-    SURFACE_FORMAT_A16B16G16R16,
-    SURFACE_FORMAT_A8B8G8R8,
-    SURFACE_FORMAT_X8B8G8R8,
-    SURFACE_FORMAT_A8R8G8B8,
-    SURFACE_FORMAT_X8R8G8B8,
-};
-
-enum TContextFuncType
-{
-    CONTEXT_CREATE_DEFAULT,
-    CONTEXT_CREATE_FROM_TYPE,
-};
-
-enum TSharedHandleType
-{
-    SHARED_HANDLE_ENABLED,
-    SHARED_HANDLE_DISABLED,
-};
-
-class CResult {
-public:
-    enum TTestResult
-    {
-        TEST_NORESULT,
-        TEST_NOTSUPPORTED,
-        TEST_PASS,
-        TEST_FAIL,
-        TEST_ERROR,
-    };
-
-    CResult();
-    ~CResult();
-
-    void ResultSub(TTestResult result);
-    TTestResult ResultLast() const;
-    int Result() const;
-
-private:
-    TTestResult _result;
-    TTestResult _resultLast;
-};
-
-void FunctionContextCreateToString(TContextFuncType contextCreateFunction,
-                                   std::string &contextFunction);
-void AdapterToString(cl_dx9_media_adapter_type_khr adapterType,
-                     std::string &adapter);
-cl_context_info
-AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType);
-
-// YUV utils
-void YUVGenerateNV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd = 0.0);
-void YUVGenerateYV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd = 0.0);
-bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv,
-                 unsigned int width, unsigned int height, cl_uchar valueMin,
-                 cl_uchar valueMax, double valueAdd = 0.0);
-bool YUVSurfaceSetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceSetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceSet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   const std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height);
-bool YUVSurfaceGetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceGetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceGet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height);
-bool YUVCompareNV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height);
-bool YUVCompareYV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height);
-bool YUVCompare(TSurfaceFormat surfaceFormat,
-                const std::vector<cl_uchar> &yuvTest,
-                const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                unsigned int height);
-
-// other types utils
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<float> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_half> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_uchar> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_float> &dataTest,
-                 const std::vector<cl_float> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum);
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_half> &dataTest,
-                 const std::vector<cl_half> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum);
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_uchar> &dataTest,
-                 const std::vector<cl_uchar> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum);
-
-bool GetImageInfo(cl_mem object, cl_image_format formatExp,
-                  size_t elementSizeExp, size_t rowPitchExp,
-                  size_t slicePitchExp, size_t widthExp, size_t heightExp,
-                  size_t depthExp, unsigned int planeExp);
-bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   void *shareHandleExp);
-bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType,
-                     const std::vector<cl_mem> &memObjList, unsigned int width,
-                     unsigned int height,
-                     std::auto_ptr<CSurfaceWrapper> &surface,
-                     void *sharedHandle);
-bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType,
-                      const cl_image_format imageFormatCheck);
-unsigned int ChannelNum(TSurfaceFormat surfaceFormat);
-unsigned int PlanesNum(TSurfaceFormat surfaceFormat);
-
-#if defined(_WIN32)
-D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat);
-#endif
-
-bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                  std::auto_ptr<CDeviceWrapper> &device);
-bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType,
-                        const CDeviceWrapper &device,
-                        TSurfaceFormat surfaceFormat);
-bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format);
-void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str);
-bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                        unsigned int width, unsigned int height,
-                        TSurfaceFormat surfaceFormat, CDeviceWrapper &device,
-                        std::auto_ptr<CSurfaceWrapper> &surface,
-                        bool sharedHandle, void **objectSharedHandle);
-
-cl_int
-deviceExistForCLTest(cl_platform_id platform,
-                     cl_dx9_media_adapter_type_khr media_adapters_type,
-                     void *media_adapters, CResult &result,
-                     TSharedHandleType sharedHandle = SHARED_HANDLE_DISABLED);
-#endif // __UTILS_KHR_MEDIA_H

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp
deleted file mode 100644
index e156584..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp
+++ /dev/null

@@ -1,463 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "wrappers.h"
-#include "harness/errorHelpers.h"
-
-LPCTSTR CDeviceWrapper::WINDOW_TITLE = _T( "cl_khr_dx9_media_sharing" );
-const int CDeviceWrapper::WINDOW_WIDTH = 256;
-const int CDeviceWrapper::WINDOW_HEIGHT = 256;
-CDeviceWrapper::TAccelerationType CDeviceWrapper::accelerationType =
-    CDeviceWrapper::ACCELERATION_HW;
-
-#if defined(_WIN32)
-const D3DFORMAT CDXVAWrapper::RENDER_TARGET_FORMAT = D3DFMT_X8R8G8B8;
-const D3DFORMAT CDXVAWrapper::VIDEO_FORMAT = D3DFMT_X8R8G8B8;
-const unsigned int CDXVAWrapper::VIDEO_FPS = 60;
-#endif
-
-#if defined(_WIN32)
-static LRESULT WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
-{
-    switch (msg)
-    {
-        case WM_DESTROY: PostQuitMessage(0); return 0;
-        case WM_PAINT: ValidateRect(hWnd, 0); return 0;
-        default: break;
-    }
-
-    return DefWindowProc(hWnd, msg, wParam, lParam);
-}
-#endif
-
-CDeviceWrapper::CDeviceWrapper()
-#if defined(_WIN32)
-    : _hInstance(NULL), _hWnd(NULL)
-#endif
-{}
-
-void CDeviceWrapper::WindowInit()
-{
-#if defined(_WIN32)
-    _hInstance = GetModuleHandle(NULL);
-    static WNDCLASSEX wc = {
-        sizeof(WNDCLASSEX), CS_CLASSDC, WndProc, 0L,   0L,
-        _hInstance,         NULL,       NULL,    NULL, NULL,
-        WINDOW_TITLE,       NULL
-    };
-
-    RegisterClassEx(&wc);
-
-    _hWnd = CreateWindow(WINDOW_TITLE, WINDOW_TITLE, WS_OVERLAPPEDWINDOW, 0, 0,
-                         WINDOW_WIDTH, WINDOW_HEIGHT, NULL, NULL, wc.hInstance,
-                         NULL);
-
-    if (!_hWnd)
-    {
-        log_error("Failed to create window");
-        return;
-    }
-
-    ShowWindow(_hWnd, SW_SHOWDEFAULT);
-    UpdateWindow(_hWnd);
-#endif
-}
-
-void CDeviceWrapper::WindowDestroy()
-{
-#if defined(_WIN32)
-    if (_hWnd) DestroyWindow(_hWnd);
-    _hWnd = NULL;
-#endif
-}
-
-#if defined(_WIN32)
-HWND CDeviceWrapper::WindowHandle() const { return _hWnd; }
-#endif
-
-int CDeviceWrapper::WindowWidth() const { return WINDOW_WIDTH; }
-
-int CDeviceWrapper::WindowHeight() const { return WINDOW_HEIGHT; }
-
-CDeviceWrapper::TAccelerationType CDeviceWrapper::AccelerationType()
-{
-    return accelerationType;
-}
-
-void CDeviceWrapper::AccelerationType(TAccelerationType accelerationTypeNew)
-{
-    accelerationType = accelerationTypeNew;
-}
-
-CDeviceWrapper::~CDeviceWrapper() { WindowDestroy(); }
-
-#if defined(_WIN32)
-CD3D9Wrapper::CD3D9Wrapper()
-    : _d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0),
-      _adapterFound(false)
-{
-    WindowInit();
-
-    _d3d9 = Direct3DCreate9(D3D_SDK_VERSION);
-    if (!_d3d9)
-    {
-        log_error("Direct3DCreate9 failed\n");
-        _status = DEVICE_FAIL;
-    }
-}
-
-CD3D9Wrapper::~CD3D9Wrapper()
-{
-    Destroy();
-
-    if (_d3d9) _d3d9->Release();
-    _d3d9 = 0;
-}
-
-void CD3D9Wrapper::Destroy()
-{
-    if (_d3dDevice) _d3dDevice->Release();
-    _d3dDevice = 0;
-}
-
-cl_int CD3D9Wrapper::Init()
-{
-    if (!WindowHandle())
-    {
-        log_error("D3D9: Window is not created\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-
-    if (!_d3d9 || DEVICE_PASS != _status || !_adapterFound) return false;
-
-    _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm);
-
-    D3DPRESENT_PARAMETERS d3dParams;
-    ZeroMemory(&d3dParams, sizeof(d3dParams));
-
-    d3dParams.Windowed = TRUE;
-    d3dParams.BackBufferCount = 1;
-    d3dParams.SwapEffect = D3DSWAPEFFECT_DISCARD;
-    d3dParams.hDeviceWindow = WindowHandle();
-    d3dParams.BackBufferWidth = WindowWidth();
-    d3dParams.BackBufferHeight = WindowHeight();
-    d3dParams.BackBufferFormat = _d3ddm.Format;
-
-    DWORD processingType = (AccelerationType() == ACCELERATION_HW)
-        ? D3DCREATE_HARDWARE_VERTEXPROCESSING
-        : D3DCREATE_SOFTWARE_VERTEXPROCESSING;
-
-    if (FAILED(_d3d9->CreateDevice(_adapterIdx - 1, D3DDEVTYPE_HAL,
-                                   WindowHandle(), processingType, &d3dParams,
-                                   &_d3dDevice)))
-    {
-        log_error("CreateDevice failed\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-
-    _d3dDevice->BeginScene();
-    _d3dDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
-    _d3dDevice->EndScene();
-
-    return true;
-}
-
-void *CD3D9Wrapper::D3D() const { return _d3d9; }
-
-void *CD3D9Wrapper::Device() const { return _d3dDevice; }
-
-D3DFORMAT CD3D9Wrapper::Format() { return _d3ddm.Format; }
-
-D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter() { return _adapter; }
-
-TDeviceStatus CD3D9Wrapper::Status() const { return _status; }
-
-bool CD3D9Wrapper::AdapterNext()
-{
-    if (DEVICE_PASS != _status) return false;
-
-    _adapterFound = false;
-    for (; _adapterIdx < _d3d9->GetAdapterCount();)
-    {
-        ++_adapterIdx;
-        D3DCAPS9 caps;
-        if (FAILED(
-                _d3d9->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
-            continue;
-
-        if (FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
-        {
-            log_error("D3D9: GetAdapterIdentifier failed\n");
-            _status = DEVICE_FAIL;
-            return false;
-        }
-
-        _adapterFound = true;
-
-        Destroy();
-        if (!Init())
-        {
-            _status = DEVICE_FAIL;
-            _adapterFound = false;
-        }
-        break;
-    }
-
-    return _adapterFound;
-}
-
-unsigned int CD3D9Wrapper::AdapterIdx() const { return _adapterIdx - 1; }
-
-
-CD3D9ExWrapper::CD3D9ExWrapper()
-    : _d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0),
-      _adapterFound(false)
-{
-    WindowInit();
-
-    HRESULT result = Direct3DCreate9Ex(D3D_SDK_VERSION, &_d3d9Ex);
-    if (FAILED(result) || !_d3d9Ex)
-    {
-        log_error("Direct3DCreate9Ex failed\n");
-        _status = DEVICE_FAIL;
-    }
-}
-
-CD3D9ExWrapper::~CD3D9ExWrapper()
-{
-    Destroy();
-
-    if (_d3d9Ex) _d3d9Ex->Release();
-    _d3d9Ex = 0;
-}
-
-void *CD3D9ExWrapper::D3D() const { return _d3d9Ex; }
-
-void *CD3D9ExWrapper::Device() const { return _d3dDeviceEx; }
-
-D3DFORMAT CD3D9ExWrapper::Format() { return _d3ddmEx.Format; }
-
-D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter() { return _adapter; }
-
-cl_int CD3D9ExWrapper::Init()
-{
-    if (!WindowHandle())
-    {
-        log_error("D3D9EX: Window is not created\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-
-    if (!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound)
-        return DEVICE_FAIL;
-
-    RECT rect;
-    GetClientRect(WindowHandle(), &rect);
-
-    D3DPRESENT_PARAMETERS d3dParams;
-    ZeroMemory(&d3dParams, sizeof(d3dParams));
-
-    d3dParams.Windowed = TRUE;
-    d3dParams.SwapEffect = D3DSWAPEFFECT_FLIP;
-    d3dParams.BackBufferFormat = D3DFMT_X8R8G8B8;
-    d3dParams.BackBufferWidth = WindowWidth();
-    d3dParams.BackBufferHeight = WindowHeight();
-
-    d3dParams.BackBufferCount = 1;
-    d3dParams.hDeviceWindow = WindowHandle();
-
-    DWORD processingType = (AccelerationType() == ACCELERATION_HW)
-        ? D3DCREATE_HARDWARE_VERTEXPROCESSING
-        : D3DCREATE_SOFTWARE_VERTEXPROCESSING;
-
-    if (FAILED(_d3d9Ex->CreateDeviceEx(_adapterIdx - 1, D3DDEVTYPE_HAL,
-                                       WindowHandle(), processingType,
-                                       &d3dParams, NULL, &_d3dDeviceEx)))
-    {
-        log_error("CreateDeviceEx failed\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-
-    _d3dDeviceEx->BeginScene();
-    _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
-    _d3dDeviceEx->EndScene();
-
-    return DEVICE_PASS;
-}
-
-void CD3D9ExWrapper::Destroy()
-{
-    if (_d3dDeviceEx) _d3dDeviceEx->Release();
-    _d3dDeviceEx = 0;
-}
-
-TDeviceStatus CD3D9ExWrapper::Status() const { return _status; }
-
-bool CD3D9ExWrapper::AdapterNext()
-{
-    if (DEVICE_FAIL == _status) return false;
-
-    _adapterFound = false;
-    for (; _adapterIdx < _d3d9Ex->GetAdapterCount();)
-    {
-        ++_adapterIdx;
-        D3DCAPS9 caps;
-        if (FAILED(
-                _d3d9Ex->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
-            continue;
-
-        if (FAILED(
-                _d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
-        {
-            log_error("D3D9EX: GetAdapterIdentifier failed\n");
-            _status = DEVICE_FAIL;
-            return false;
-        }
-
-        _adapterFound = true;
-        Destroy();
-        if (!Init())
-        {
-            _status = DEVICE_FAIL;
-            _adapterFound = _status;
-        }
-
-        break;
-    }
-
-    return _adapterFound;
-}
-
-unsigned int CD3D9ExWrapper::AdapterIdx() const { return _adapterIdx - 1; }
-
-CDXVAWrapper::CDXVAWrapper()
-    : _dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false)
-{
-    _status = _d3d9.Status();
-}
-
-CDXVAWrapper::~CDXVAWrapper() { DXVAHDDestroy(); }
-
-void *CDXVAWrapper::Device() const { return _dxvaDevice; }
-
-TDeviceStatus CDXVAWrapper::Status() const
-{
-    if (_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL)
-        return DEVICE_FAIL;
-    else if (_status == DEVICE_NOTSUPPORTED
-             || _d3d9.Status() == DEVICE_NOTSUPPORTED)
-        return DEVICE_NOTSUPPORTED;
-    else
-        return DEVICE_PASS;
-}
-
-bool CDXVAWrapper::AdapterNext()
-{
-    if (DEVICE_PASS != _status) return false;
-
-    _adapterFound = _d3d9.AdapterNext();
-    _status = _d3d9.Status();
-    if (DEVICE_PASS != _status)
-    {
-        _adapterFound = false;
-        return false;
-    }
-
-    if (!_adapterFound) return false;
-
-    DXVAHDDestroy();
-    _status = DXVAHDInit();
-    if (DEVICE_PASS != _status)
-    {
-        _adapterFound = false;
-        return false;
-    }
-
-    return true;
-}
-
-TDeviceStatus CDXVAWrapper::DXVAHDInit()
-{
-    if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL)
-        || !_adapterFound)
-        return DEVICE_FAIL;
-
-    DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 };
-
-    DXVAHD_CONTENT_DESC desc;
-    desc.InputFrameFormat = DXVAHD_FRAME_FORMAT_PROGRESSIVE;
-    desc.InputFrameRate = fps;
-    desc.InputWidth = WindowWidth();
-    desc.InputHeight = WindowHeight();
-    desc.OutputFrameRate = fps;
-    desc.OutputWidth = WindowWidth();
-    desc.OutputHeight = WindowHeight();
-
-#ifdef USE_SOFTWARE_PLUGIN
-    _status = DEVICE_FAIL;
-    return DEVICE_FAIL;
-#endif
-
-    HRESULT hr = DXVAHD_CreateDevice(
-        static_cast<IDirect3DDevice9Ex *>(_d3d9.Device()), &desc,
-        DXVAHD_DEVICE_USAGE_PLAYBACK_NORMAL, NULL, &_dxvaDevice);
-    if (FAILED(hr))
-    {
-        if (hr == E_NOINTERFACE)
-        {
-            log_error(
-                "DXVAHD_CreateDevice skipped due to no supported devices!\n");
-            _status = DEVICE_NOTSUPPORTED;
-        }
-        else
-        {
-            log_error("DXVAHD_CreateDevice failed\n");
-            _status = DEVICE_FAIL;
-        }
-    }
-
-    return _status;
-}
-
-void CDXVAWrapper::DXVAHDDestroy()
-{
-    if (_dxvaDevice) _dxvaDevice->Release();
-    _dxvaDevice = 0;
-}
-
-void *CDXVAWrapper::D3D() const { return _d3d9.D3D(); }
-
-unsigned int CDXVAWrapper::AdapterIdx() const { return _d3d9.AdapterIdx(); }
-
-const CD3D9ExWrapper &CDXVAWrapper::D3D9() const { return _d3d9; }
-
-CD3D9SurfaceWrapper::CD3D9SurfaceWrapper(): mMem(NULL) {}
-
-CD3D9SurfaceWrapper::CD3D9SurfaceWrapper(IDirect3DSurface9 *mem): mMem(mem) {}
-
-CD3D9SurfaceWrapper::~CD3D9SurfaceWrapper()
-{
-    if (mMem != NULL) mMem->Release();
-    mMem = NULL;
-}
-
-#endif
-
-CSurfaceWrapper::CSurfaceWrapper() {}
-
-CSurfaceWrapper::~CSurfaceWrapper() {}

diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h
deleted file mode 100644
index e3a7c6d..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h
+++ /dev/null

@@ -1,195 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef __WRAPPERS_H
-#define __WRAPPERS_H
-
-#if defined(_WIN32)
-#include <d3d9.h>
-#if defined(__MINGW32__)
-#include <rpcsal.h>
-typedef unsigned char UINT8;
-#define __out
-#define __in
-#define __inout
-#define __out_bcount(size)
-#define __out_bcount_opt(size)
-#define __in_opt
-#define __in_ecount(size)
-#define __in_ecount_opt(size)
-#define __out_opt
-#define __out_ecount(size)
-#define __out_ecount_opt(size)
-#define __in_bcount_opt(size)
-#define __inout_opt
-#define __inout_bcount(size)
-#define __in_bcount(size)
-#define __deref_out
-#endif
-#include <dxvahd.h>
-#include <tchar.h>
-#endif
-
-enum TDeviceStatus
-{
-    DEVICE_NOTSUPPORTED,
-    DEVICE_PASS,
-    DEVICE_FAIL,
-};
-
-class CDeviceWrapper {
-public:
-    enum TAccelerationType
-    {
-        ACCELERATION_HW,
-        ACCELERATION_SW,
-    };
-
-    CDeviceWrapper();
-    virtual ~CDeviceWrapper();
-
-    virtual bool AdapterNext() = 0;
-    virtual unsigned int AdapterIdx() const = 0;
-    virtual void *Device() const = 0;
-    virtual TDeviceStatus Status() const = 0;
-    virtual void *D3D() const = 0;
-
-#if defined(_WIN32)
-    HWND WindowHandle() const;
-#endif
-    int WindowWidth() const;
-    int WindowHeight() const;
-    void WindowInit();
-
-
-    static TAccelerationType AccelerationType();
-    static void AccelerationType(TAccelerationType accelerationTypeNew);
-
-private:
-    static LPCTSTR WINDOW_TITLE;
-    static const int WINDOW_WIDTH;
-    static const int WINDOW_HEIGHT;
-    static TAccelerationType accelerationType;
-
-#if defined(_WIN32)
-    HMODULE _hInstance;
-    HWND _hWnd;
-#endif
-
-    void WindowDestroy();
-};
-
-class CSurfaceWrapper {
-public:
-    CSurfaceWrapper();
-    virtual ~CSurfaceWrapper();
-};
-
-#if defined(_WIN32)
-// windows specific wrappers
-class CD3D9Wrapper : public CDeviceWrapper {
-public:
-    CD3D9Wrapper();
-    ~CD3D9Wrapper();
-
-    virtual bool AdapterNext();
-    virtual unsigned int AdapterIdx() const;
-    virtual void *Device() const;
-    virtual TDeviceStatus Status() const;
-    virtual void *D3D() const;
-
-private:
-    LPDIRECT3D9 _d3d9;
-    LPDIRECT3DDEVICE9 _d3dDevice;
-    D3DDISPLAYMODE _d3ddm;
-    D3DADAPTER_IDENTIFIER9 _adapter;
-    TDeviceStatus _status;
-    unsigned int _adapterIdx;
-    bool _adapterFound;
-
-    D3DFORMAT Format();
-    D3DADAPTER_IDENTIFIER9 Adapter();
-    int Init();
-    void Destroy();
-};
-
-class CD3D9ExWrapper : public CDeviceWrapper {
-public:
-    CD3D9ExWrapper();
-    ~CD3D9ExWrapper();
-
-    virtual bool AdapterNext();
-    virtual unsigned int AdapterIdx() const;
-    virtual void *Device() const;
-    virtual TDeviceStatus Status() const;
-    virtual void *D3D() const;
-
-private:
-    LPDIRECT3D9EX _d3d9Ex;
-    LPDIRECT3DDEVICE9EX _d3dDeviceEx;
-    D3DDISPLAYMODEEX _d3ddmEx;
-    D3DADAPTER_IDENTIFIER9 _adapter;
-    TDeviceStatus _status;
-    unsigned int _adapterIdx;
-    bool _adapterFound;
-
-    D3DFORMAT Format();
-    D3DADAPTER_IDENTIFIER9 Adapter();
-    int Init();
-    void Destroy();
-};
-
-class CDXVAWrapper : public CDeviceWrapper {
-public:
-    CDXVAWrapper();
-    ~CDXVAWrapper();
-
-    virtual bool AdapterNext();
-    virtual unsigned int AdapterIdx() const;
-    virtual void *Device() const;
-    virtual TDeviceStatus Status() const;
-    virtual void *D3D() const;
-    const CD3D9ExWrapper &D3D9() const;
-
-private:
-    CD3D9ExWrapper _d3d9;
-    IDXVAHD_Device *_dxvaDevice;
-    TDeviceStatus _status;
-    bool _adapterFound;
-
-    static const D3DFORMAT RENDER_TARGET_FORMAT;
-    static const D3DFORMAT VIDEO_FORMAT;
-    static const unsigned int VIDEO_FPS;
-
-    TDeviceStatus DXVAHDInit();
-    void DXVAHDDestroy();
-};
-
-class CD3D9SurfaceWrapper : public CSurfaceWrapper {
-public:
-    CD3D9SurfaceWrapper();
-    CD3D9SurfaceWrapper(IDirect3DSurface9 *mem);
-    ~CD3D9SurfaceWrapper();
-
-    operator IDirect3DSurface9 *() { return mMem; }
-    IDirect3DSurface9 **operator&() { return &mMem; }
-    IDirect3DSurface9 *operator->() const { return mMem; }
-
-private:
-    IDirect3DSurface9 *mMem;
-};
-#endif
-
-#endif // __D3D_WRAPPERS

diff --git a/test_conformance/generic_address_space/basic_tests.cpp b/test_conformance/generic_address_space/basic_tests.cpp
index b2e745c..0b81564 100644
--- a/test_conformance/generic_address_space/basic_tests.cpp
+++ b/test_conformance/generic_address_space/basic_tests.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -39,9 +39,7 @@
 
         const char *srcPtr = src.c_str();
 
-        if (create_single_kernel_helper(context, &program, &kernel, 1, &srcPtr,
-                                        "testKernel"))
-        {
+        if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
             log_error("create_single_kernel_helper failed");
             return -1;
         }

diff --git a/test_conformance/generic_address_space/main.cpp b/test_conformance/generic_address_space/main.cpp
index 0114758..12fa4a6 100644
--- a/test_conformance/generic_address_space/main.cpp
+++ b/test_conformance/generic_address_space/main.cpp

@@ -75,9 +75,7 @@
 
     if (version < expected_min_version)
     {
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
     }
 

diff --git a/test_conformance/generic_address_space/stress_tests.cpp b/test_conformance/generic_address_space/stress_tests.cpp
index 7193e69..4f94a5d 100644
--- a/test_conformance/generic_address_space/stress_tests.cpp
+++ b/test_conformance/generic_address_space/stress_tests.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -41,9 +41,7 @@
 
         const char *srcPtr = src.c_str();
 
-        if (create_single_kernel_helper(context, &program, &kernel, 1, &srcPtr,
-                                        "testKernel"))
-        {
+        if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
             log_error("create_single_kernel_helper failed");
             return -1;
         }

diff --git a/test_conformance/geometrics/main.cpp b/test_conformance/geometrics/main.cpp
index 45f2b06..038999d 100644
--- a/test_conformance/geometrics/main.cpp
+++ b/test_conformance/geometrics/main.cpp

@@ -38,6 +38,6 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/geometrics/test_geometrics.cpp b/test_conformance/geometrics/test_geometrics.cpp
index e305026..2fcf31c 100644
--- a/test_conformance/geometrics/test_geometrics.cpp
+++ b/test_conformance/geometrics/test_geometrics.cpp

@@ -188,25 +188,19 @@
         }
         fillWithTrickyNumbers( inDataA, inDataB, vecsize );
 
-        streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    sizeof(cl_float) * vecsize * TEST_SIZE,
-                                    inDataA, NULL);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecsize * TEST_SIZE, inDataA, NULL);
         if( streams[0] == NULL )
         {
             log_error("ERROR: Creating input array A failed!\n");
             return -1;
         }
-        streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    sizeof(cl_float) * vecsize * TEST_SIZE,
-                                    inDataB, NULL);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecsize * TEST_SIZE, inDataB, NULL);
         if( streams[1] == NULL )
         {
             log_error("ERROR: Creating input array B failed!\n");
             return -1;
         }
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * vecsize * TEST_SIZE, NULL, NULL);
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * vecsize * TEST_SIZE, NULL, NULL);
         if( streams[2] == NULL )
         {
             log_error("ERROR: Creating output array failed!\n");
@@ -359,24 +353,19 @@
     }
 
 
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize * TEST_SIZE, inDataB, NULL);
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * TEST_SIZE, NULL, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * TEST_SIZE, NULL, NULL);
     if( streams[2] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");
@@ -671,15 +660,14 @@
         }
     }
 
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_float) * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
     {
@@ -884,17 +872,13 @@
             inDataA[i] = any_float(d);
     }
 
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize* TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * vecSize  * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");

diff --git a/test_conformance/geometrics/test_geometrics_double.cpp b/test_conformance/geometrics/test_geometrics_double.cpp
index 222017e..7dec751 100644
--- a/test_conformance/geometrics/test_geometrics_double.cpp
+++ b/test_conformance/geometrics/test_geometrics_double.cpp

@@ -210,22 +210,19 @@
         }
         fillWithTrickyNumbers_double( inDataA, inDataB, vecsize );
 
-        streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, bufSize,
-                                    inDataA, NULL);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), bufSize, inDataA, NULL);
         if( streams[0] == NULL )
         {
             log_error("ERROR: Creating input array A failed!\n");
             return -1;
         }
-        streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, bufSize,
-                                    inDataB, NULL);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), bufSize, inDataB, NULL);
         if( streams[1] == NULL )
         {
             log_error("ERROR: Creating input array B failed!\n");
             return -1;
         }
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, bufSize, NULL, NULL);
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), bufSize, NULL, NULL);
         if( streams[2] == NULL )
         {
             log_error("ERROR: Creating output array failed!\n");
@@ -327,24 +324,19 @@
     fillWithTrickyNumbers_double( inDataA, inDataB, vecSize );
 
 
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataB, NULL);
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * TEST_SIZE, NULL, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * TEST_SIZE, NULL, NULL);
     if( streams[2] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");
@@ -563,16 +555,13 @@
 
     fillWithTrickyNumbers_double( inDataA, NULL, vecSize );
 
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * TEST_SIZE, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");
@@ -770,17 +759,13 @@
         inDataA[ i ] = any_double(d);
 
 
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * vecSize * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");

diff --git a/test_conformance/gl/helpers.cpp b/test_conformance/gl/helpers.cpp
index 16441a4..864059b 100644
--- a/test_conformance/gl/helpers.cpp
+++ b/test_conformance/gl/helpers.cpp

@@ -368,7 +368,7 @@
                 cl_float *outData = new cl_float[ numPixels * channelNum ];
                 for( size_t i = 0; i < numPixels * channelNum; i++ )
                 {
-                    outData[i] = cl_half_to_float(src[i]);
+                    outData[ i ] = convert_half_to_float(src[ i ]);
                 }
                 return (char *)outData;
             }

diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp
index 35f01ee..f11590f 100644
--- a/test_conformance/gl/test_buffers.cpp
+++ b/test_conformance/gl/test_buffers.cpp

@@ -184,7 +184,7 @@
     glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW );
 
     glBindBuffer( GL_ARRAY_BUFFER, 0 );
-    glFinish();
+    glFlush();
 
 
     /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */

diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp
index 00bf2cc..37202ce 100644
--- a/test_conformance/gl/test_fence_sync.cpp
+++ b/test_conformance/gl/test_fence_sync.cpp

@@ -74,7 +74,7 @@
     glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
     glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
 }
-#ifndef GL_ARB_sync
+
 #define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
 
 #define GL_OBJECT_TYPE            0x9112
@@ -97,7 +97,6 @@
 #define GL_TIMEOUT_EXPIRED            0x911B
 #define GL_CONDITION_SATISFIED        0x911C
 #define GL_WAIT_FAILED            0x911D
-#endif
 
 #define USING_ARB_sync 1
 #endif
@@ -286,7 +285,7 @@
     virtual void * IRun( void )
     {
         cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
-        return (void *)(uintptr_t)error;
+        return (void *)error;
     }
 };
 

diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp
index 07f5b65..0d0e5c7 100644
--- a/test_conformance/gl/test_image_methods.cpp
+++ b/test_conformance/gl/test_image_methods.cpp

@@ -19,7 +19,7 @@
 
 using namespace std;
 
-struct image_kernel_data
+typedef struct image_kernel_data
 {
     cl_int width;
     cl_int height;
@@ -277,8 +277,7 @@
     test_error( error, "Unable to create kernel to test against" );
 
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
 
     // Set up arguments and run
@@ -287,10 +286,10 @@
     error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
     test_error( error, "Unable to set kernel argument" );
 
-    // Finish and Acquire.
-    glFinish();
-    error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &image, 0, NULL, NULL);
-    test_error(error, "Unable to acquire GL obejcts");
+  // Flush and Acquire.
+  glFlush();
+  error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &image, 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
 
     size_t threads[1] = { 1 }, localThreads[1] = { 1 };
 

diff --git a/test_conformance/gles/CMakeLists.txt b/test_conformance/gles/CMakeLists.txt
index c76fe51..f367490 100644
--- a/test_conformance/gles/CMakeLists.txt
+++ b/test_conformance/gles/CMakeLists.txt

@@ -15,6 +15,11 @@
         ../../test_common/gles/helpers.cpp
     )
 
-list(APPEND CLConform_LIBRARIES EGL GLESv2)
+if(ANDROID)
+    list(APPEND CLConform_LIBRARIES GLESv2)
+elseif(WIN32)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGLES3")
+    list(APPEND CLConform_LIBRARIES libEGL libGLESv2 )
+endif(ANDROID)
 
 include(../CMakeCommon.txt)

diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
index 644fa63..d0c12c9 100644
--- a/test_conformance/gles/main.cpp
+++ b/test_conformance/gles/main.cpp

@@ -62,8 +62,8 @@
 TEST_FN_REDIRECTOR( renderbuffer_write )
 TEST_FN_REDIRECTOR( renderbuffer_getinfo )
 
-#ifdef GL_ES_VERSION_3_0
-TEST_FN_REDIRECTOR(fence_sync)
+#ifndef GL_ES_VERSION_2_0
+TEST_FN_REDIRECTOR( test_fence_sync )
 #endif
 
 test_definition test_list[] = {
@@ -82,17 +82,14 @@
     TEST_FN_REDIRECT( renderbuffer_getinfo )
 };
 
-#ifdef GL_ES_VERSION_3_0
+#ifndef GL_ES_VERSION_2_0
 test_definition test_list32[] = {
     TEST_FN_REDIRECT( fence_sync )
 };
 #endif
 
 const int test_num = ARRAY_SIZE( test_list );
-
-#ifdef GL_ES_VERSION_3_0
 const int test_num32 = ARRAY_SIZE( test_list32 );
-#endif
 
 
 int main(int argc, const char *argv[])
@@ -116,15 +113,12 @@
         for( int i = 0; i < test_num; i++ )
             log_info( "\t%s\n", test_list[i].name );
 
-#ifdef GL_ES_VERSION_3_0
         log_info( "Available 3.2 tests:\n" );
         for( int i = 0; i < test_num32; i++ )
             log_info( "\t%s\n", test_list32[i].name );
-#endif
 
-        log_info("Note: Any 3.2 test names must follow 2.1 test names on the "
-                 "command line.");
-        log_info("Use environment variables to specify desired device.");
+    log_info( "Note: Any 3.2 test names must follow 2.1 test names on the command line." );
+    log_info( "Use environment variables to specify desired device." );
 
         return 0;
     }
@@ -147,14 +141,12 @@
   // Check to see if any 2.x or 3.2 test names were specified on the command line.
   unsigned first_32_testname = 0;
 
-#ifdef GL_ES_VERSION_3_0
   for (int j=1; (j<argc) && (!first_32_testname); ++j)
     for (int i = 0; i < test_num32; ++i)
       if (strcmp(test_list32[i].name, argv[j]) == 0 ) {
         first_32_testname = j;
         break;
       }
-#endif
 
   // Create the environment for the test.
     GLEnvironment *glEnv = GLEnvironment::Instance();
@@ -330,7 +322,7 @@
           error = -1;
           goto cleanup;
         }
-#ifndef GLES3
+#ifdef GL_ES_VERSION_2_0
         log_info("Cannot test OpenGL 3.2! This test was built for OpenGL ES 2.0\n");
         error = -1;
         goto cleanup;

diff --git a/test_conformance/gles/setup_egl.cpp b/test_conformance/gles/setup_egl.cpp
index fe0f8ca..6bb53cf 100644
--- a/test_conformance/gles/setup_egl.cpp
+++ b/test_conformance/gles/setup_egl.cpp

@@ -112,17 +112,11 @@
         size_t dev_size;
         cl_int status;
 
-        clGetGLContextInfoKHR_fn GetGLContextInfo =
-            (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(
-                _platform, "clGetGLContextInfoKHR");
-        if (GetGLContextInfo == NULL)
-        {
-            print_error(status, "clGetGLContextInfoKHR failed");
-            return NULL;
-        }
-
-        status = GetGLContextInfo(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR,
-                                  sizeof(devices), devices, &dev_size);
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_DEVICES_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
         if (status != CL_SUCCESS) {
             print_error(status, "clGetGLContextInfoKHR failed");
             return NULL;
@@ -130,9 +124,11 @@
         dev_size /= sizeof(cl_device_id);
         log_info("GL _context supports %d compute devices\n", dev_size);
 
-        status =
-            GetGLContextInfo(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
-                             sizeof(devices), devices, &dev_size);
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
         if (status != CL_SUCCESS) {
             print_error(status, "clGetGLContextInfoKHR failed");
             return NULL;
@@ -168,12 +164,9 @@
 
         // Check all devices, search for one that supports cl_khr_gl_sharing
         for (int i=0; i<(int)num_of_devices; i++) {
-            if (!is_extension_available(devices[i], "cl_khr_gl_sharing"))
-            {
+            if (!is_extension_available(devices[i], "cl_khr_gl_sharing"){
                 log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
-            }
-            else
-            {
+            } else {
                 log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
                 interop_devices++;
             }

diff --git a/test_conformance/gles/test_fence_sync.cpp b/test_conformance/gles/test_fence_sync.cpp
index 0af91a4..7683b9c 100644
--- a/test_conformance/gles/test_fence_sync.cpp
+++ b/test_conformance/gles/test_fence_sync.cpp

@@ -160,7 +160,7 @@
     glAttachShader(program, vpShader);
 
     GLuint fpShader;
-    char *fpstr = (char *)malloc(sizeof(fragmentshader));
+    char* fpstr = (char*)malloc(strlen(fragmentshader));
     strcpy(fpstr, fragmentshader);
     fpShader = glCreateShader(GL_FRAGMENT_SHADER);
     glShaderSource(fpShader, 1, (const GLchar **)&fpstr, NULL);
@@ -297,7 +297,7 @@
     virtual void * IRun( void )
     {
         cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
-        return (void *)(intptr_t)error;
+        return (void *)error;
     }
 };
 

diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp
index 52867c2..7bc756d 100644
--- a/test_conformance/half/Test_vLoadHalf.cpp
+++ b/test_conformance/half/Test_vLoadHalf.cpp

@@ -20,7 +20,44 @@
 #include "cl_utils.h"
 #include "tests.h"
 
-#include <CL/cl_half.h>
+static inline float half2float( cl_ushort us )
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
 
 int Test_vLoadHalf_private( cl_device_id device, bool aligned )
 {
@@ -170,38 +207,17 @@
         };
 
         const char *source_local2[] = {
-            "#define VECTOR_LEN (",
-            vector_size_names[vectorSize],
-            "/",
-            align_divisors[vectorSize],
-            ")\n"
-            "#define ALIGN_TYPE ",
-            align_types[vectorSize],
-            "\n"
-            "__kernel void test( const __global half *p, __global float",
-            vector_size_name_extensions[vectorSize],
-            " *f )\n"
+            "__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
             "{\n"
-            "   __local uchar data[",
-            local_buf_size,
-            "/",
-            align_divisors[vectorSize],
-            "*sizeof(ALIGN_TYPE)] ",
-            "__attribute__((aligned(sizeof(ALIGN_TYPE))));\n"
+            "   __local ", align_types[vectorSize], " data[", local_buf_size, "/", align_divisors[vectorSize], "];\n"
             "   __local half* hdata_p = (__local half*) data;\n"
-            "   __global ALIGN_TYPE* i_p = (__global ALIGN_TYPE*)p;\n"
+            "   __global ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize],"*)p;\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   int k;\n"
-            "   for (k=0; k<VECTOR_LEN; k++)\n"
-            "     *(__local ",
-            "ALIGN_TYPE*)&(data[(lid*VECTOR_LEN+k)*sizeof(ALIGN_TYPE)]) = ",
-            "i_p[i*VECTOR_LEN+k];\n"
-            "   f[i] = vload",
-            aligned ? "a" : "",
-            "_half",
-            vector_size_name_extensions[vectorSize],
-            "( lid, hdata_p );\n"
+            "   for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
+            "     data[lid*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k] = i_p[i*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k];\n"
+            "   f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( lid, hdata_p );\n"
             "}\n"
         };
 
@@ -466,7 +482,8 @@
         //create the reference result
         const unsigned short *s = (const unsigned short *)gIn_half;
         float *d = (float *)gOut_single_reference;
-        for (j = 0; j < count; j++) d[j] = cl_half_to_float(s[j]);
+        for( j = 0; j < count; j++ )
+            d[j] = half2float( s[j] );
 
         //Check the vector lengths
         for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
@@ -527,7 +544,7 @@
                                        (aligned?"aligned":"unaligned"));
                             gFailCount++;
                             error = -1;
-                            goto exit;
+                            break; // goto exit;
                         }
                     }
                 }

diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index c3a328a..2b24a68 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp

@@ -21,8 +21,6 @@
 #include "cl_utils.h"
 #include "tests.h"
 
-#include <CL/cl_half.h>
-
 typedef struct ComputeReferenceInfoF_
 {
     float *x;
@@ -210,44 +208,406 @@
     return ret;
 }
 
-static cl_half float2half_rte(float f)
+static cl_ushort float2half_rte( float f );
+static cl_ushort float2half_rtz( float f );
+static cl_ushort float2half_rtp( float f );
+static cl_ushort float2half_rtn( float f );
+static cl_ushort double2half_rte( double f );
+static cl_ushort double2half_rtz( double f );
+static cl_ushort double2half_rtp( double f );
+static cl_ushort double2half_rtn( double f );
+
+static cl_ushort
+float2half_rte( float f )
 {
-    return cl_half_from_float(f, CL_HALF_RTE);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
 }
 
-static cl_half float2half_rtz(float f)
+static cl_ushort
+float2half_rtz( float f )
 {
-    return cl_half_from_float(f, CL_HALF_RTZ);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+    {
+        if( x == INFINITY )
+            return 0x7c00 | sign;
+
+        return 0x7bff | sign;
+    }
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+
+    u.u &= 0xFFFFE000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
 }
 
-static cl_half float2half_rtp(float f)
+static cl_ushort
+float2half_rtp( float f )
 {
-    return cl_half_from_float(f, CL_HALF_RTP);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f > MAKE_HEX_FLOAT(0x1.ffcp15f, 0x1ffcL, 3) )
+        return 0x7c00;
+
+    if( f <= MAKE_HEX_FLOAT(-0x1.0p16f, -0x1L, 16) )
+    {
+        if( f == -INFINITY )
+            return 0xfc00;
+
+        return 0xfbff;
+    }
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    {
+        if( f > 0 )
+            return 1;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        r += (float) r != x && f > 0.0f;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    float g = u.f;
+    u.u &= 0xFFFFE000U;
+    if( g > u.f )
+        u.u += 0x00002000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
 }
 
-static cl_half float2half_rtn(float f)
+
+static cl_ushort
+float2half_rtn( float f )
 {
-    return cl_half_from_float(f, CL_HALF_RTN);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+    {
+        if( f == INFINITY )
+            return 0x7c00;
+
+        return 0x7bff;
+    }
+
+    if( f < MAKE_HEX_FLOAT(-0x1.ffcp15f, -0x1ffcL, 3) )
+        return 0xfc00;
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    {
+        if( f < 0 )
+            return 0x8001;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        r += (float) r != x && f < 0.0f;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    u.u &= 0xFFFFE000U;
+    if( u.f > f )
+        u.u += 0x00002000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
 }
 
-static cl_half double2half_rte(double f)
+static cl_ushort
+double2half_rte( double f )
 {
-    return cl_half_from_double(f, CL_HALF_RTE);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_DOUBLE(0x1.ffep15, 0x1ffeLL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_DOUBLE(0x1.0p-25, 0x1LL, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_DOUBLE(0x1.8p-24, 0x18LL, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        u.f = x * MAKE_HEX_DOUBLE(0x1.0p-1050, 0x1LL, -1050);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_DOUBLE(0x1.0p42, 0x1LL, 42);
+    u.u &= 0x7ff0000000000000ULL;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_DOUBLE(0x1.0p-1008, 0x1LL, -1008);
+
+    return (u.u >> (53-11)) | sign;
 }
 
-static cl_half double2half_rtz(double f)
+static cl_ushort
+double2half_rtz( double f )
 {
-    return cl_half_from_double(f, CL_HALF_RTZ);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    if( x == INFINITY )
+        return 0x7c00 | sign;
+
+    // overflow
+    if( x >= MAKE_HEX_DOUBLE(0x1.0p16, 0x1LL, 16) )
+        return 0x7bff | sign;
+
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+
+    u.u &= 0xFFFFFC0000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+
+    return (u.u >> (53-11)) | sign;
 }
 
-static cl_half double2half_rtp(double f)
+static cl_ushort
+double2half_rtp( double f )
 {
-    return cl_half_from_double(f, CL_HALF_RTP);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f > MAKE_HEX_DOUBLE(0x1.ffcp15, 0x1ffcLL, 3) )
+        return 0x7c00;
+
+    if( f <= MAKE_HEX_DOUBLE(-0x1.0p16, -0x1LL, 16) )
+    {
+        if( f == -INFINITY )
+            return 0xfc00;
+
+        return 0xfbff;
+    }
+
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+    {
+        if( f > 0 )
+            return 1;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        if( 0 == sign )
+            r += (double) r != x;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    double g = u.f;
+    u.u &= 0xFFFFFC0000000000ULL;
+    if( g != u.f && 0 == sign)
+        u.u += 0x0000040000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+
+    return (u.u >> (53-11)) | sign;
 }
 
-static cl_half double2half_rtn(double f)
+
+static cl_ushort
+double2half_rtn( double f )
 {
-    return cl_half_from_double(f, CL_HALF_RTN);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f >= MAKE_HEX_DOUBLE(0x1.0p16, 0x1LL, 16) )
+    {
+        if( f == INFINITY )
+            return 0x7c00;
+
+        return 0x7bff;
+    }
+
+    if( f < MAKE_HEX_DOUBLE(-0x1.ffcp15, -0x1ffcLL, 3) )
+        return 0xfc00;
+
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+    {
+        if( f < 0 )
+            return 0x8001;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_DOUBLE(0x1.0p24, 0x1LL, 24);
+        int r = (int) x;
+        if( sign )
+            r += (double) r != x;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    double g = u.f;
+    u.u &= 0xFFFFFC0000000000ULL;
+    if( g < u.f && sign)
+        u.u += 0x0000040000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+
+    return (u.u >> (53-11)) | sign;
 }
 
 int test_vstore_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
@@ -696,30 +1056,30 @@
 
     ComputeReferenceInfoF fref;
     fref.x = (float *)gIn_single;
-    fref.r = (cl_half *)gOut_half_reference;
+    fref.r = (cl_ushort *)gOut_half_reference;
     fref.f = referenceFunc;
     fref.lim = blockCount;
     fref.count = (blockCount + threadCount - 1) / threadCount;
 
     CheckResultInfoF fchk;
     fchk.x = (const float *)gIn_single;
-    fchk.r = (const cl_half *)gOut_half_reference;
-    fchk.s = (const cl_half *)gOut_half;
+    fchk.r = (const cl_ushort *)gOut_half_reference;
+    fchk.s = (const cl_ushort *)gOut_half;
     fchk.f = referenceFunc;
     fchk.lim = blockCount;
     fchk.count = (blockCount + threadCount - 1) / threadCount;
 
     ComputeReferenceInfoD dref;
     dref.x = (double *)gIn_double;
-    dref.r = (cl_half *)gOut_half_reference_double;
+    dref.r = (cl_ushort *)gOut_half_reference_double;
     dref.f = doubleReferenceFunc;
     dref.lim = blockCount;
     dref.count = (blockCount + threadCount - 1) / threadCount;
 
     CheckResultInfoD dchk;
     dchk.x = (const double *)gIn_double;
-    dchk.r = (const cl_half *)gOut_half_reference_double;
-    dchk.s = (const cl_half *)gOut_half;
+    dchk.r = (const cl_ushort *)gOut_half_reference_double;
+    dchk.s = (const cl_ushort *)gOut_half;
     dchk.f = doubleReferenceFunc;
     dchk.lim = blockCount;
     dchk.count = (blockCount + threadCount - 1) / threadCount;
@@ -764,9 +1124,7 @@
                 cl_uint pattern = 0xdeaddead;
                 memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
 
-                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE,
-                                             0, count * sizeof(cl_half),
-                                             gOut_half, 0, NULL, NULL);
+                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clWriteArray\n" );
                     gFailCount++;
@@ -781,9 +1139,7 @@
                     goto exit;
                 }
 
-                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0,
-                                            count * sizeof(cl_half), gOut_half,
-                                            0, NULL, NULL);
+                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clReadArray\n" );
                     gFailCount++;
@@ -799,9 +1155,7 @@
                 if (gTestDouble) {
                     memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
 
-                    error = clEnqueueWriteBuffer(
-                        gQueue, gOutBuffer_half, CL_FALSE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clWriteArray\n" );
                         gFailCount++;
@@ -816,9 +1170,7 @@
                         goto exit;
                     }
 
-                    error = clEnqueueReadBuffer(
-                        gQueue, gOutBuffer_half, CL_TRUE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clReadArray\n" );
                         gFailCount++;
@@ -1293,30 +1645,30 @@
 
     ComputeReferenceInfoF fref;
     fref.x = (float *)gIn_single;
-    fref.r = (cl_half *)gOut_half_reference;
+    fref.r = (cl_ushort *)gOut_half_reference;
     fref.f = referenceFunc;
     fref.lim = blockCount;
     fref.count = (blockCount + threadCount - 1) / threadCount;
 
     CheckResultInfoF fchk;
     fchk.x = (const float *)gIn_single;
-    fchk.r = (const cl_half *)gOut_half_reference;
-    fchk.s = (const cl_half *)gOut_half;
+    fchk.r = (const cl_ushort *)gOut_half_reference;
+    fchk.s = (const cl_ushort *)gOut_half;
     fchk.f = referenceFunc;
     fchk.lim = blockCount;
     fchk.count = (blockCount + threadCount - 1) / threadCount;
 
     ComputeReferenceInfoD dref;
     dref.x = (double *)gIn_double;
-    dref.r = (cl_half *)gOut_half_reference_double;
+    dref.r = (cl_ushort *)gOut_half_reference_double;
     dref.f = doubleReferenceFunc;
     dref.lim = blockCount;
     dref.count = (blockCount + threadCount - 1) / threadCount;
 
     CheckResultInfoD dchk;
     dchk.x = (const double *)gIn_double;
-    dchk.r = (const cl_half *)gOut_half_reference_double;
-    dchk.s = (const cl_half *)gOut_half;
+    dchk.r = (const cl_ushort *)gOut_half_reference_double;
+    dchk.s = (const cl_ushort *)gOut_half;
     dchk.f = doubleReferenceFunc;
     dchk.lim = blockCount;
     dchk.count = (blockCount + threadCount - 1) / threadCount;
@@ -1361,9 +1713,7 @@
                 cl_uint pattern = 0xdeaddead;
                 memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
 
-                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE,
-                                             0, count * sizeof(cl_half),
-                                             gOut_half, 0, NULL, NULL);
+                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clWriteArray\n" );
                     gFailCount++;
@@ -1378,9 +1728,7 @@
                     goto exit;
                 }
 
-                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0,
-                                            count * sizeof(cl_half), gOut_half,
-                                            0, NULL, NULL);
+                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clReadArray\n" );
                     gFailCount++;
@@ -1396,9 +1744,7 @@
                 if (gTestDouble) {
                     memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
 
-                    error = clEnqueueWriteBuffer(
-                        gQueue, gOutBuffer_half, CL_FALSE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clWriteArray\n" );
                         gFailCount++;
@@ -1413,9 +1759,7 @@
                         goto exit;
                     }
 
-                    error = clEnqueueReadBuffer(
-                        gQueue, gOutBuffer_half, CL_TRUE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clReadArray\n" );
                         gFailCount++;

diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h
index 50d8af3..82a6311 100644
--- a/test_conformance/half/cl_utils.h
+++ b/test_conformance/half/cl_utils.h

@@ -18,7 +18,6 @@
 
 #include "harness/testHarness.h"
 #include "harness/compat.h"
-#include "harness/conversions.h"
 
 #include <stdio.h>
 
@@ -110,6 +109,43 @@
     return u;
 }
 
+static inline int IsHalfSubnormal( uint16_t x )
+{
+    // this relies on interger overflow to exclude 0 as a subnormal
+    return ( ( x & 0x7fffU ) - 1U ) < 0x03ffU;
+}
+
+// prevent silent failures due to missing FLT_RADIX
+#ifndef FLT_RADIX
+    #error FLT_RADIX is not defined by float.h
+#endif
+
+static inline int IsFloatSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf((float) x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+static inline int IsDoubleSubnormal( long double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs((double)x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
 #endif /* CL_UTILS_H */
 
 

diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 6600cc5..1af138c 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp

@@ -19,7 +19,7 @@
 
 #if !defined (_WIN32)
 #include <sys/resource.h>
-#if defined(__APPLE__)
+#if !defined(__ANDROID__)
 #include <sys/sysctl.h>
 #endif
 #include <libgen.h>

diff --git a/test_conformance/images/clCopyImage/main.cpp b/test_conformance/images/clCopyImage/main.cpp
index c2cad01..04a8f51 100644
--- a/test_conformance/images/clCopyImage/main.cpp
+++ b/test_conformance/images/clCopyImage/main.cpp

@@ -13,16 +13,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/testHarness.h"
 
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+bool gUseRamp;
 bool gEnablePitch;
 bool gTestMipmaps;
 int gTypesToTest;
@@ -31,6 +38,8 @@
 
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, MethodsToTest testMethod );
 
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
 static void printUsage( const char *execName );
 
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
@@ -126,6 +135,8 @@
             gTestSmallImages = true;
         else if( strcmp( argv[i], "max_images" ) == 0 )
             gTestMaxImages = true;
+        else if( strcmp( argv[i], "use_ramps" ) == 0 )
+            gUseRamp = true;
 
         else if( strcmp( argv[i], "use_pitches" ) == 0 )
             gEnablePitch = true;
@@ -151,8 +162,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
 
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
 
     free(argList);
     return ret;
@@ -172,6 +182,7 @@
     log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
     log_info( "\trandomize - Use random seed\n" );
     log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "\tuse_ramp - Instead of random data, uses images filled with ramps (and 0xff on any padding pixels) to ease debugging\n" );
     log_info( "\n" );
     log_info( "Test names:\n" );
     for( int i = 0; i < test_num; i++ )

diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index 2c996c7..ab22320 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp

@@ -15,6 +15,14 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
 

diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index 0b61693..62eed3f 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp

@@ -15,6 +15,14 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
 

diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 1a69a1f..7af2fe3 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp

@@ -15,6 +15,14 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
 

diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index eb6dd55..f784230 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp

@@ -16,6 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
 

diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 8a56c95..d341455 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp

@@ -16,6 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
 

diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
index 6327ba5..5624245 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp

@@ -15,6 +15,13 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+
 // Defined in test_copy_generic.cpp
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );

diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp
index da6731d..fb17623 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp

@@ -15,6 +15,13 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+
 // Defined in test_copy_generic.cpp
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );

diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
index c098f64..b3b3223 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp

@@ -16,6 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
 

diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index 026916e..5a8f3d3 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp

@@ -15,6 +15,14 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
 static void CL_CALLBACK free_pitch_buffer( cl_mem image, void *buf )
 {
     free( buf );
@@ -105,41 +113,23 @@
 
     if ( *error != CL_SUCCESS )
     {
-        long long unsigned imageSize = get_image_size_mb(imageInfo);
         switch (imageInfo->type)
         {
             case CL_MEM_OBJECT_IMAGE1D:
-                log_error("ERROR: Unable to create 1D image of size %d (%llu "
-                          "MB):(%s)",
-                          (int)imageInfo->width, imageSize,
-                          IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( *error ) );
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
-                log_error("ERROR: Unable to create 2D image of size %d x %d "
-                          "(%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          imageSize, IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( *error ) );
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
-                log_error("ERROR: Unable to create 3D image of size %d x %d x "
-                          "%d (%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth, imageSize,
-                          IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( *error ) );
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                log_error("ERROR: Unable to create 1D image array of size %d x "
-                          "%d (%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->arraySize,
-                          imageSize, IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( *error ) );
                 break;
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                log_error("ERROR: Unable to create 2D image array of size %d x "
-                          "%d x %d (%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->arraySize, imageSize,
-                          IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( *error ) );
                 break;
         }
         log_error("ERROR: and %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels);
@@ -284,6 +274,7 @@
     return img;
 }
 
+
 // WARNING -- not thread safe
 BufferOwningPtr<char> srcData;
 BufferOwningPtr<char> dstData;
@@ -308,7 +299,24 @@
     }
     else
     {
-        srcBytes = get_image_size(srcImageInfo);
+        switch (srcImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                srcBytes = srcImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                srcBytes = srcImageInfo->height * srcImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                srcBytes = srcImageInfo->depth * srcImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch;
+                break;
+        }
     }
 
     if (srcBytes > srcData.getSize())
@@ -344,7 +352,24 @@
     }
     else
     {
-        destImageSize = get_image_size(dstImageInfo);
+        switch (dstImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                destImageSize = dstImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                destImageSize = dstImageInfo->height * dstImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                destImageSize = dstImageInfo->depth * dstImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch;
+                break;
+        }
     }
 
     if (destImageSize > dstData.getSize())
@@ -356,11 +381,7 @@
             log_error( "ERROR: Unable to malloc %lu bytes for dstData\n", destImageSize );
             return -1;
         }
-    }
 
-    if (destImageSize > dstHost.getSize())
-    {
-        dstHost.reset(NULL);
         dstHost.reset(malloc(destImageSize),NULL,0,destImageSize);
         if (dstHost == NULL) {
             dstData.reset(NULL);
@@ -547,17 +568,58 @@
         {
             if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
             {
+                log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)dstImageInfo->width, (int)dstImageInfo->height, (int)dstImageInfo->depth, (int)dstImageInfo->rowPitch, (int)dstImageInfo->rowPitch - (int)dstImageInfo->width * (int)get_pixel_size( dstImageInfo->format ) );
+
                 // Find the first missing pixel
                 size_t pixel_size = get_pixel_size( dstImageInfo->format );
                 size_t where = 0;
                 for( where = 0; where < dstImageInfo->width; where++ )
                     if( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
                         break;
+                log_error( "Failed at column: %ld   ", where );
+                switch( pixel_size )
+                {
+                    case 1:
+                        log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 2:
+                        log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 3:
+                        log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
+                                  ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 4:
+                        log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 6:
+                        log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
+                                  ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 8:
+                        log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 12:
+                        log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                                  ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 16:
+                        log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                                  ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3],
+                                  ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3]
+                                  );
+                        break;
+                    default:
+                        log_error( "Don't know how to print pixel size of %ld\n", pixel_size );
+                        break;
+                }
 
-                print_first_pixel_difference_error(
-                    where, sourcePtr + pixel_size * where,
-                    destPtr + pixel_size * where, dstImageInfo, y,
-                    dstImageInfo->depth);
                 return -1;
             }
             sourcePtr += rowPitch;
@@ -578,14 +640,113 @@
         return error;
     }
 
-    // Ensure the unmap call completes.
-    error = clFinish(queue);
-    if (error != CL_SUCCESS)
-    {
-        log_error("ERROR: clFinish() failed to return CL_SUCCESS: %s\n",
-                  IGetErrorString(error));
-        return error;
-    }
-
     return 0;
 }
+
+int test_copy_image_size_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, MTdata d )
+{
+    size_t sourcePos[ 3 ], destPos[ 3 ], regionSize[ 3 ];
+    int ret = 0, retCode;
+
+    for (int i = 0; i < 8; i++)
+    {
+        switch (srcImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = 1;
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = random_in_range( 0, (int)(srcImageInfo->depth - 4), d );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->arraySize - 4), d );
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = random_in_range( 0, (int)(srcImageInfo->arraySize - 4), d );
+                break;
+        }
+
+        switch (dstImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = 1;
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = random_in_range( 0, (int)(dstImageInfo->depth - 4), d );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->arraySize - 4), d );
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = random_in_range( 0, (int)(dstImageInfo->arraySize - 4), d );
+                break;
+        }
+
+        if ( (dstImageInfo->width - destPos[0]) < (srcImageInfo->width - sourcePos[0]) )
+            regionSize[0] = random_in_range(1, (dstImageInfo->width - destPos[0]), d);
+        else
+            regionSize[0] = random_in_range(1, (srcImageInfo->width - sourcePos[0]), d);
+
+        if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE1D || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D)
+            regionSize[1] = 0;
+        else
+        {
+            if ( (dstImageInfo->height - destPos[1]) < (srcImageInfo->height - sourcePos[1]) )
+                regionSize[1] = random_in_range(1, (dstImageInfo->height - destPos[1]), d);
+            else
+                regionSize[1] = random_in_range(1, (srcImageInfo->height - sourcePos[1]), d);
+        }
+
+        regionSize[2] = 0;
+        if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D && srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+        {
+            if ( (dstImageInfo->depth - destPos[2]) < (srcImageInfo->depth - sourcePos[2]) )
+                regionSize[2] = random_in_range(1, (dstImageInfo->depth - destPos[2]), d);
+            else
+                regionSize[2] = random_in_range(1, (srcImageInfo->depth - sourcePos[2]), d);
+        }
+        else if ( (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) )
+        {
+            if ( (dstImageInfo->arraySize - destPos[2]) < (srcImageInfo->arraySize - sourcePos[2]) )
+                regionSize[2] = random_in_range(1, (dstImageInfo->arraySize - destPos[2]), d);
+            else
+                regionSize[2] = random_in_range(1, (srcImageInfo->arraySize - sourcePos[2]), d);
+        }
+
+        // Go for it!
+        retCode = test_copy_image_generic( context, queue, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+

diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp
index 6ee1e53..1cb79e9 100644
--- a/test_conformance/images/clCopyImage/test_loops.cpp
+++ b/test_conformance/images/clCopyImage/test_loops.cpp

@@ -16,6 +16,18 @@
 #include "../testBase.h"
 #include "../common.h"
 
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern bool               gTestMipmaps;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order      gChannelOrderToUse;
+
+
+extern bool gDebugTrace;
+
 extern int test_copy_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 extern int test_copy_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 extern int test_copy_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
@@ -105,14 +117,25 @@
     int ret = 0;
 
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
 
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+
+    filter_formats(formatList, filterFlags, numFormats, NULL);
 
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
     {
         int test_return = 0;
         if( filterFlags[i] )
@@ -157,6 +180,9 @@
         ret += test_return;
     }
 
+    delete filterFlags;
+    delete formatList;
+
     return ret;
 }
 

diff --git a/test_conformance/images/clFillImage/main.cpp b/test_conformance/images/clFillImage/main.cpp
index b19d85a..23d9e4c 100644
--- a/test_conformance/images/clFillImage/main.cpp
+++ b/test_conformance/images/clFillImage/main.cpp

@@ -13,11 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/testHarness.h"
 
 bool gDebugTrace;
@@ -31,6 +37,7 @@
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, MethodsToTest testMethod );
 static void printUsage( const char *execName );
 
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
 
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
@@ -124,8 +131,7 @@
     if ( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
 
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
 
     free(argList);
     return ret;

diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp
index c3f2318..51eb822 100644
--- a/test_conformance/images/clFillImage/test_fill_1D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp

@@ -15,6 +15,14 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                     const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );

diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
index b4347a4..edbcacd 100644
--- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp

@@ -15,6 +15,14 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                     const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );

diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp
index bb66fc2..8e76e86 100644
--- a/test_conformance/images/clFillImage/test_fill_2D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp

@@ -15,6 +15,14 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                     const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );

diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
index 3265aab..260b869 100644
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp

@@ -15,6 +15,13 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                    const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );

diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp
index 9db0ac7..298db0e 100644
--- a/test_conformance/images/clFillImage/test_fill_3D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp

@@ -15,6 +15,13 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                    const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );

diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp
index 59bf24a..6b59bad 100644
--- a/test_conformance/images/clFillImage/test_fill_generic.cpp
+++ b/test_conformance/images/clFillImage/test_fill_generic.cpp

@@ -15,13 +15,23 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+
 extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
 
+
 static void CL_CALLBACK free_pitch_buffer( cl_mem image, void *buf )
 {
     free( buf );
 }
 
+
 cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr<char>& data, image_descriptor *imageInfo, int *error )
 {
     cl_mem img;
@@ -478,16 +488,58 @@
 
             if (memcmp( sourcePtr, destPtr, scanlineSize ) != 0)
             {
+                log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)imageInfo->width, (int)imageInfo->height, (int)thirdDim, (int)imageInfo->rowPitch, (int)imageInfo->rowPitch - (int)imageInfo->width * (int)get_pixel_size( imageInfo->format ) );
+
                 // Find the first missing pixel
                 size_t pixel_size = get_pixel_size( imageInfo->format );
                 size_t where = 0;
                 for ( where = 0; where < imageInfo->width; where++ )
                     if ( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
                         break;
+                log_error( "Failed at column: %ld   ", where );
+                switch ( pixel_size )
+                {
+                case 1:
+                    log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 2:
+                    log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 3:
+                    log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
+                               ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 4:
+                    log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 6:
+                    log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
+                               ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 8:
+                    log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 12:
+                    log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                               ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 16:
+                    log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                               ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3],
+                               ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3]
+                             );
+                    break;
+                default:
+                    log_error( "Don't know how to print pixel size of %ld\n", pixel_size );
+                    break;
+                }
 
-                print_first_pixel_difference_error(
-                    where, sourcePtr + pixel_size * where,
-                    destPtr + pixel_size * where, imageInfo, y, thirdDim);
                 return -1;
             }
 

diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp
index 759f48d..0a4c571 100644
--- a/test_conformance/images/clFillImage/test_loops.cpp
+++ b/test_conformance/images/clFillImage/test_loops.cpp

@@ -16,7 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
 
-extern int gTypesToTest;
+extern bool               gDebugTrace;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type    gChannelTypeToUse;
+extern cl_channel_order   gChannelOrderToUse;
+
 
 extern int test_fill_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType outputType );
 extern int test_fill_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType outputType );
@@ -69,22 +76,35 @@
     int ret = 0;
 
     // Grab the list of supported image formats
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+    if ( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if ( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
 
     for (auto test : imageTestTypes)
     {
         if (gTypesToTest & test.type)
         {
-            std::vector<bool> filterFlags(formatList.size(), false);
-            if (filter_formats(formatList, filterFlags, test.channelTypes) == 0)
+            if (filter_formats(formatList, filterFlags, numFormats,
+                               test.channelTypes)
+                == 0)
             {
                 log_info("No formats supported for %s type\n", test.name);
             }
             else
             {
                 // Run the format list
-                for (unsigned int i = 0; i < formatList.size(); i++)
+                for (unsigned int i = 0; i < numFormats; i++)
                 {
                     if (filterFlags[i])
                     {
@@ -112,6 +132,9 @@
         }
     }
 
+    delete[] filterFlags;
+    delete[] formatList;
+
     return ret;
 }
 

diff --git a/test_conformance/images/clGetInfo/main.cpp b/test_conformance/images/clGetInfo/main.cpp
index 80b3cbb..cea2ad6 100644
--- a/test_conformance/images/clGetInfo/main.cpp
+++ b/test_conformance/images/clGetInfo/main.cpp

@@ -13,21 +13,30 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
 #include "../testBase.h"
-#include "../harness/compat.h"
 
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+int  gTypesToTest;
 cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
 
 extern int test_image_set( cl_device_id device, cl_context context, cl_mem_object_type image_type );
 static void printUsage( const char *execName );
 
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, context, CL_MEM_OBJECT_IMAGE1D );
@@ -108,8 +117,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
 
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
 
     free(argList);
     return ret;

diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp
index 0d704b8..e1d9206 100644
--- a/test_conformance/images/clGetInfo/test_1D.cpp
+++ b/test_conformance/images/clGetInfo/test_1D.cpp

@@ -15,6 +15,11 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
 extern int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
 
 

diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
index 447fc7c..c250e09 100644
--- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp

@@ -15,6 +15,11 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
 extern int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
 
 int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags )

diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 74a6012..4953707 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp

@@ -15,6 +15,12 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+
 int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch )
 {
     int error;

diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp
index af5062e..4bc189a 100644
--- a/test_conformance/images/clGetInfo/test_3D.cpp
+++ b/test_conformance/images/clGetInfo/test_3D.cpp

@@ -15,6 +15,11 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
 extern int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
 
 int test_get_image_info_3D( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags )

diff --git a/test_conformance/images/clGetInfo/test_loops.cpp b/test_conformance/images/clGetInfo/test_loops.cpp
index 17f02d8..e64ec3b 100644
--- a/test_conformance/images/clGetInfo/test_loops.cpp
+++ b/test_conformance/images/clGetInfo/test_loops.cpp

@@ -15,6 +15,18 @@
 //
 #include "../testBase.h"
 #include "../common.h"
+#include "harness/imageHelpers.h"
+#include <algorithm>
+#include <iterator>
+
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+
+
+extern bool gDebugTrace;
 
 extern int test_get_image_info_1D( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
 extern int test_get_image_info_2D( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
@@ -22,6 +34,30 @@
 extern int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
 extern int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
 
+static bool check_minimum_supported(cl_image_format *formatList,
+                                    unsigned int numFormats,
+                                    cl_mem_flags flags,
+                                    cl_mem_object_type image_type,
+                                    cl_device_id device)
+{
+	bool passed = true;
+	Version version = get_device_cl_version(device);
+	std::vector<cl_image_format> formatsToSupport;
+	build_required_image_formats(flags, image_type, device, formatsToSupport);
+
+	for (auto &format: formatsToSupport)
+	{
+		if( !find_format( formatList, numFormats, &format ) )
+		{
+			log_error( "ERROR: Format required by OpenCL %s is not supported: ", version.to_string().c_str() );
+			print_header( &format, true );
+			passed = false;
+		}
+	}
+
+	return passed;
+}
+
 int test_image_type( cl_device_id device, cl_context context, cl_mem_object_type image_type, cl_mem_flags flags )
 {
     log_info( "Running %s %s-only tests...\n", convert_image_type_to_string(image_type), flags == CL_MEM_READ_ONLY ? "read" : "write" );
@@ -29,14 +65,39 @@
     int ret = 0;
 
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, image_type, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
 
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+    if ( get_format_list( context, image_type, formatList, numFormats, flags ) )
+        return -1;
+
+    BufferOwningPtr<cl_image_format> formatListBuf(formatList);
+
+    if ((image_type == CL_MEM_OBJECT_IMAGE3D) && (flags != CL_MEM_READ_ONLY)) {
+        log_info("No requirement for 3D write in OpenCL 1.2. Not checking formats.\n");
+    } else {
+        log_info("Checking for required OpenCL 1.2 formats.\n");
+        if (check_minimum_supported( formatList, numFormats, flags, image_type, device ) == false) {
+            ret++;
+        } else {
+            log_info("All required formats present.\n");
+        }
+    }
+
+    filterFlags = new bool[ numFormats ];
+    BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
+
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
 
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
     {
         int test_return = 0;
         if( filterFlags[i] )

diff --git a/test_conformance/images/clReadWriteImage/main.cpp b/test_conformance/images/clReadWriteImage/main.cpp
index 18c7e23..d8d096e 100644
--- a/test_conformance/images/clReadWriteImage/main.cpp
+++ b/test_conformance/images/clReadWriteImage/main.cpp

@@ -13,20 +13,30 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
 #include "../testBase.h"
-#include "../harness/compat.h"
 
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+bool gUseRamp;
 bool gTestMipmaps;
+int  gTypesToTest;
 cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
 bool            gEnablePitch = false;
 
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
 static void printUsage( const char *execName );
 
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_object_type image_type );
@@ -89,6 +99,8 @@
             gTestMaxImages = true;
         else if( strcmp( argv[i], "use_pitches" ) == 0 )
             gEnablePitch = true;
+        else if( strcmp( argv[i], "use_ramps" ) == 0 )
+            gUseRamp = true;
         else if( strcmp( argv[i], "test_mipmaps") == 0 ) {
             gTestMipmaps = true;
             // Don't test pitches with mipmaps right now.
@@ -112,8 +124,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
 
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
 
     free(argList);
     return ret;
@@ -131,6 +142,7 @@
     log_info( "\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
     log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
     log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "\tuse_ramp - Instead of random data, uses images filled with ramps (and 0xff on any padding pixels) to ease debugging\n" );
     log_info( "\ttest_mipmaps - Test mipmapped images\n" );
     log_info( "\trandomize - Uses random seed\n" );
     log_info( "\n" );

diff --git a/test_conformance/images/clReadWriteImage/test_loops.cpp b/test_conformance/images/clReadWriteImage/test_loops.cpp
index 782e4b3..e8ca8c8 100644
--- a/test_conformance/images/clReadWriteImage/test_loops.cpp
+++ b/test_conformance/images/clReadWriteImage/test_loops.cpp

@@ -16,23 +16,21 @@
 #include "../testBase.h"
 #include "../common.h"
 
-extern int test_read_image_set_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_read_image_set_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_read_image_set_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
-extern int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+
+
+extern bool gDebugTrace;
+extern bool gTestMipmaps;
+
+extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 
 int test_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_object_type imageType, cl_mem_flags flags )
 {
@@ -40,76 +38,80 @@
 
     int ret = 0;
 
-    if (gTestMipmaps)
-    {
-        if (0 == is_extension_available(device, "cl_khr_mipmap_image"))
-        {
-            log_info("-----------------------------------------------------\n");
-            log_info("This device does not support "
-                     "cl_khr_mipmap_image.\nSkipping mipmapped image test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
-            return 0;
-        }
-    }
-
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
 
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+  if ( gTestMipmaps )
+  {
+    if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
+    {
+      log_info( "-----------------------------------------------------\n" );
+      log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
+      log_info( "-----------------------------------------------------\n\n" );
+      return 0;
+    }
+  }
+
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
 
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
     {
         int test_return = 0;
-        if (filterFlags[i])
+        if( filterFlags[i] )
         {
-            log_info("NOT RUNNING: ");
-            print_header(&formatList[i], false);
+            log_info( "NOT RUNNING: " );
+            print_header( &formatList[ i ], false );
             continue;
         }
 
-        print_header(&formatList[i], false);
+        print_header( &formatList[ i ], false );
 
         gTestCount++;
 
-        switch (imageType)
-        {
+        switch (imageType) {
             case CL_MEM_OBJECT_IMAGE1D:
-                test_return = test_read_image_set_1D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_read_image_set_1D( device, context, queue, &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
-                test_return = test_read_image_set_2D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_read_image_set_2D( device, context, queue, &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
-                test_return = test_read_image_set_3D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_read_image_set_3D( device,context, queue,  &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                test_return = test_read_image_set_1D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_read_image_set_1D_array( device, context, queue, &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                test_return = test_read_image_set_2D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_read_image_set_2D_array( device, context, queue, &formatList[ i ] );
                 break;
         }
 
-        if (test_return)
-        {
+        if (test_return) {
             gFailCount++;
-            log_error("FAILED: ");
-            print_header(&formatList[i], true);
-            log_info("\n");
+            log_error( "FAILED: " );
+            print_header( &formatList[ i ], true );
+            log_info( "\n" );
         }
 
         ret += test_return;
     }
 
+    delete[] filterFlags;
+    delete[] formatList;
+
     return ret;
 }
 

diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index eef5bf4..7d9eb84 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp

@@ -15,9 +15,16 @@
 //
 #include "../testBase.h"
 
-int test_read_image_1D(cl_context context, cl_command_queue queue,
-                       image_descriptor *imageInfo, MTdata d,
-                       cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
+
+int test_read_image_1D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error;
 
@@ -36,14 +43,12 @@
     // Construct testing sources
   if(!gTestMipmaps)
   {
-      image = create_image_1d(context, flags, imageInfo->format,
-                              imageInfo->width, 0, NULL, NULL, &error);
-      if (image == NULL)
-      {
-          log_error("ERROR: Unable to create 1D image of size %d (%s)",
-                    (int)imageInfo->width, IGetErrorString(error));
-          return -1;
-      }
+    image = create_image_1d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, 0, NULL, NULL, &error );
+    if( image == NULL )
+    {
+      log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) );
+      return -1;
+    }
   }
   else
   {
@@ -52,8 +57,7 @@
     image_desc.image_width = imageInfo->width;
     image_desc.num_mip_levels = imageInfo->num_mip_levels;
 
-    image = clCreateImage(context, flags, imageInfo->format, &image_desc, NULL,
-                          &error);
+    image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
     if( error != CL_SUCCESS )
     {
       log_error( "ERROR: Unable to create %d level mipmapped 1D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
@@ -163,9 +167,7 @@
     return 0;
 }
 
-int test_read_image_set_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth;
     cl_ulong maxAllocSize, memSize;
@@ -199,8 +201,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d\n", (int)imageInfo.width );
 
-            int ret =
-                test_read_image_1D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_1D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }
@@ -224,7 +225,7 @@
             log_info("Testing %d\n", (int)imageInfo.width);
             if( gDebugTrace )
                 log_info( "   at max size %d\n", (int)maxWidth );
-            if (test_read_image_1D(context, queue, &imageInfo, seed, flags))
+            if( test_read_image_1D( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -260,8 +261,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
-            int ret =
-                test_read_image_1D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_1D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
index 5d5c288..3f84556 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp

@@ -15,9 +15,16 @@
 //
 #include "../testBase.h"
 
-int test_read_image_1D_array(cl_context context, cl_command_queue queue,
-                             image_descriptor *imageInfo, MTdata d,
-                             cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
+
+int test_read_image_1D_array( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error;
 
@@ -37,9 +44,7 @@
     // Construct testing sources
     if(!gTestMipmaps)
     {
-        image = create_image_1d_array(context, flags, imageInfo->format,
-                                      imageInfo->width, imageInfo->arraySize, 0,
-                                      0, NULL, &error);
+        image = create_image_1d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->arraySize, 0, 0, NULL, &error );
         if( image == NULL )
         {
             log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -54,8 +59,7 @@
         image_desc.image_array_size = imageInfo->arraySize;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
 
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
         {
             log_error( "ERROR: Unable to create %d level mipmapped 1D image of width %d and array size %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, IGetErrorString( error ) );
@@ -169,9 +173,7 @@
     return 0;
 }
 
-int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -208,8 +210,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
 
-                int ret = test_read_image_1D_array(context, queue, &imageInfo,
-                                                   seed, flags);
+                int ret = test_read_image_1D_array( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
             }
@@ -236,8 +237,7 @@
             log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)maxWidth, (int)maxArraySize );
-            if (test_read_image_1D_array(context, queue, &imageInfo, seed,
-                                         flags))
+            if( test_read_image_1D_array( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -275,8 +275,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
-            int ret = test_read_image_1D_array(context, queue, &imageInfo, seed,
-                                               flags);
+            int ret = test_read_image_1D_array( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
index fb2e794..0eae51b 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp

@@ -15,9 +15,16 @@
 //
 #include "../testBase.h"
 
-int test_read_image_2D(cl_context context, cl_command_queue queue,
-                       image_descriptor *imageInfo, MTdata d,
-                       cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+
+
+int test_read_image_2D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error;
 
@@ -37,9 +44,7 @@
     // Construct testing sources
     if(!gTestMipmaps)
     {
-        image =
-            create_image_2d(context, flags, imageInfo->format, imageInfo->width,
-                            imageInfo->height, 0, NULL, &error);
+        image = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 0, NULL, &error );
         if( image == NULL )
         {
             log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( error ) );
@@ -54,8 +59,7 @@
         image_desc.image_height = imageInfo->height;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
 
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
         {
             log_error( "ERROR: Unable to create %d level mipmapped 2D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
@@ -172,9 +176,7 @@
     return 0;
 }
 
-int test_read_image_set_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxHeight;
     cl_ulong maxAllocSize, memSize;
@@ -210,8 +212,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
 
-                int ret =
-                    test_read_image_2D(context, queue, &imageInfo, seed, flags);
+                int ret = test_read_image_2D( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
             }
@@ -237,7 +238,7 @@
             log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)maxWidth, (int)maxHeight );
-            if (test_read_image_2D(context, queue, &imageInfo, seed, flags))
+            if( test_read_image_2D( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -273,8 +274,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
-            int ret =
-                test_read_image_2D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_2D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
index d0113bb..547e5eb 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp

@@ -15,9 +15,14 @@
 //
 #include "../testBase.h"
 
-int test_read_image_2D_array(cl_context context, cl_command_queue queue,
-                             image_descriptor *imageInfo, MTdata d,
-                             cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+
+int test_read_image_2D_array( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error;
 
@@ -37,9 +42,7 @@
     // Construct testing sources
     if(!gTestMipmaps)
     {
-        image = create_image_2d_array(context, flags, imageInfo->format,
-                                      imageInfo->width, imageInfo->height,
-                                      imageInfo->arraySize, 0, 0, NULL, &error);
+        image = create_image_2d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, NULL, &error );
         if( image == NULL )
         {
             log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -55,8 +58,7 @@
         image_desc.image_array_size = imageInfo->arraySize;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
 
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
         {
             log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
@@ -147,9 +149,7 @@
     return 0;
 }
 
-int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxHeight, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -188,8 +188,7 @@
 
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
-                    int ret = test_read_image_2D_array(context, queue,
-                                                       &imageInfo, seed, flags);
+                    int ret = test_read_image_2D_array( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
                 }
@@ -217,8 +216,7 @@
                 imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
 
             log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
-            if (test_read_image_2D_array(context, queue, &imageInfo, seed,
-                                         flags))
+            if( test_read_image_2D_array( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -262,8 +260,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
-            int ret = test_read_image_2D_array(context, queue, &imageInfo, seed,
-                                               flags);
+            int ret = test_read_image_2D_array( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
index 2dcd243..16baeeb 100644
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp

@@ -15,9 +15,14 @@
 //
 #include "../testBase.h"
 
-int test_read_image_3D(cl_context context, cl_command_queue queue,
-                       image_descriptor *imageInfo, MTdata d,
-                       cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+
+int test_read_image_3D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error;
 
@@ -36,9 +41,7 @@
     // Construct testing sources
     if(!gTestMipmaps)
     {
-        image = create_image_3d(context, flags, imageInfo->format,
-                                imageInfo->width, imageInfo->height,
-                                imageInfo->depth, 0, 0, NULL, &error);
+        image = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, NULL, &error );
         if( image == NULL )
         {
             log_error( "ERROR: Unable to create 2D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) );
@@ -54,8 +57,7 @@
         image_desc.image_depth = imageInfo->depth;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
 
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
         {
             log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
@@ -152,9 +154,7 @@
     return 0;
 }
 
-int test_read_image_set_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxHeight, maxDepth;
     cl_ulong maxAllocSize, memSize;
@@ -193,8 +193,7 @@
 
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
-                    int ret = test_read_image_3D(context, queue, &imageInfo,
-                                                 seed, flags);
+                    int ret = test_read_image_3D( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
                 }
@@ -222,8 +221,8 @@
         imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
 
       log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
-      if (test_read_image_3D(context, queue, &imageInfo, seed, flags))
-          return -1;
+      if( test_read_image_3D( context, queue, &imageInfo, seed ) )
+        return -1;
     }
   }
     else
@@ -265,8 +264,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
-            int ret =
-                test_read_image_3D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_3D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/common.cpp b/test_conformance/images/common.cpp
index 7323f11..9e54261 100644
--- a/test_conformance/images/common.cpp
+++ b/test_conformance/images/common.cpp

@@ -58,13 +58,27 @@
     { kTestFloat, kFloat, floatFormats, "float" },
 } };
 
-int filter_formats(const std::vector<cl_image_format> &formatList,
-                   std::vector<bool> &filterFlags,
+const char *convert_image_type_to_string(cl_mem_object_type image_type)
+{
+    switch (image_type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D: return "1D";
+        case CL_MEM_OBJECT_IMAGE2D: return "2D";
+        case CL_MEM_OBJECT_IMAGE3D: return "3D";
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY: return "1D array";
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY: return "2D array";
+        case CL_MEM_OBJECT_IMAGE1D_BUFFER: return "1D image buffer";
+        default: return "unrecognized object type";
+    }
+}
+
+int filter_formats(cl_image_format *formatList, bool *filterFlags,
+                   unsigned int formatCount,
                    cl_channel_type *channelDataTypesToFilter,
                    bool testMipmaps /*=false*/)
 {
     int numSupported = 0;
-    for (unsigned int j = 0; j < formatList.size(); j++)
+    for (unsigned int j = 0; j < formatCount; j++)
     {
         // If this format has been previously filtered, remove the filter
         if (filterFlags[j]) filterFlags[j] = false;
@@ -129,18 +143,18 @@
 }
 
 int get_format_list(cl_context context, cl_mem_object_type imageType,
-                    std::vector<cl_image_format> &outFormatList,
-                    cl_mem_flags flags)
+                    cl_image_format *&outFormatList,
+                    unsigned int &outFormatCount, cl_mem_flags flags)
 {
-    cl_uint formatCount;
     int error = clGetSupportedImageFormats(context, flags, imageType, 0, NULL,
-                                           &formatCount);
+                                           &outFormatCount);
     test_error(error, "Unable to get count of supported image formats");
 
-    outFormatList.resize(formatCount);
+    outFormatList =
+        (outFormatCount > 0) ? new cl_image_format[outFormatCount] : NULL;
 
-    error = clGetSupportedImageFormats(context, flags, imageType, formatCount,
-                                       outFormatList.data(), NULL);
+    error = clGetSupportedImageFormats(context, flags, imageType,
+                                       outFormatCount, outFormatList, NULL);
     test_error(error, "Unable to get list of supported image formats");
     return 0;
 }

diff --git a/test_conformance/images/common.h b/test_conformance/images/common.h
index 27e8679..114623e 100644
--- a/test_conformance/images/common.h
+++ b/test_conformance/images/common.h

@@ -22,7 +22,6 @@
 #include "harness/conversions.h"
 
 #include <array>
-#include <vector>
 
 extern cl_channel_type gChannelTypeToUse;
 extern cl_channel_order gChannelOrderToUse;
@@ -41,13 +40,14 @@
 
 extern std::array<ImageTestTypes, 3> imageTestTypes;
 
-int filter_formats(const std::vector<cl_image_format> &formatList,
-                   std::vector<bool> &filterFlags,
+const char *convert_image_type_to_string(cl_mem_object_type imageType);
+int filter_formats(cl_image_format *formatList, bool *filterFlags,
+                   unsigned int formatCount,
                    cl_channel_type *channelDataTypesToFilter,
                    bool testMipmaps = false);
 int get_format_list(cl_context context, cl_mem_object_type imageType,
-                    std::vector<cl_image_format> &outFormatList,
-                    cl_mem_flags flags);
+                    cl_image_format *&outFormatList,
+                    unsigned int &outFormatCount, cl_mem_flags flags);
 size_t random_in_ranges(size_t minimum, size_t rangeA, size_t rangeB, MTdata d);
 
 #endif // IMAGES_COMMON_H

diff --git a/test_conformance/images/kernel_image_methods/main.cpp b/test_conformance/images/kernel_image_methods/main.cpp
index 50653ef..ef6bd2c 100644
--- a/test_conformance/images/kernel_image_methods/main.cpp
+++ b/test_conformance/images/kernel_image_methods/main.cpp

@@ -13,16 +13,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../harness/compat.h"
+#include "../harness/parseParameters.h"
 
 #include <stdio.h>
 #include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
 #include "../testBase.h"
-#include "../harness/compat.h"
-#include "../harness/parseParameters.h"
 
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+int  gTypesToTest;
+bool gDeviceLt20 = false;
 
 cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
@@ -31,6 +39,8 @@
 
 static void printUsage( const char *execName );
 
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, context, queue, CL_MEM_OBJECT_IMAGE1D );
@@ -111,8 +121,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
 
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
 
     free(argList);
     return ret;

diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp
index 0059d4c..757a4a0 100644
--- a/test_conformance/images/kernel_image_methods/test_1D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D.cpp

@@ -15,6 +15,10 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 
 struct image_kernel_data
 {
@@ -26,28 +30,24 @@
 };
 
 static const char *methodTest1DImageKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    "__kernel void sample_kernel( %s image1d_t input, __global "
-    "image_kernel_data *outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image1d_t input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
 
-static int test_get_1Dimage_info_single(cl_context context,
-                                        cl_command_queue queue,
-                                        image_descriptor *imageInfo, MTdata d,
-                                        cl_mem_flags flags)
+static int test_get_1Dimage_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error = 0;
 
@@ -65,9 +65,7 @@
     // Construct testing source
     if( gDebugTrace )
         log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width );
-
-    image = create_image_1d(context, flags, imageInfo->format, imageInfo->width,
-                            0, NULL, NULL, &error);
+    image = create_image_1d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, 0, NULL, NULL, &error );
     if( image == NULL )
     {
         log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) );
@@ -79,8 +77,6 @@
 
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
 
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -89,7 +85,7 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
 
     // Create a program to run against
-    sprintf(programSrc, methodTest1DImageKernelPattern, image_access_qualifier,
+    sprintf( programSrc, methodTest1DImageKernelPattern,
             channelTypeConstantString, channelOrderConstantString);
 
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -97,13 +93,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create kernel to test against" );
 
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
 
     // Set up arguments and run
@@ -149,9 +143,7 @@
     return error;
 }
 
-int test_get_image_info_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_get_image_info_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth;
     cl_ulong maxAllocSize, memSize;
@@ -181,8 +173,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d\n", (int)imageInfo.width );
 
-            int ret = test_get_1Dimage_info_single(context, queue, &imageInfo,
-                                                   seed, flags);
+            int ret = test_get_1Dimage_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }
@@ -203,8 +194,7 @@
             log_info( "Testing %d\n", (int)sizes[ idx ][ 0 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
-            if (test_get_1Dimage_info_single(context, queue, &imageInfo, seed,
-                                             flags))
+            if( test_get_1Dimage_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -233,8 +223,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
-            int ret = test_get_1Dimage_info_single(context, queue, &imageInfo,
-                                                   seed, flags);
+            int ret = test_get_1Dimage_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
index 797161c..f5e778b 100644
--- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp

@@ -15,6 +15,10 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 
 struct image_kernel_data
 {
@@ -27,30 +31,26 @@
 };
 
 static const char *methodTestKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int arraySize;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    "__kernel void sample_kernel( %s image1d_array_t input, __global "
-    "image_kernel_data *outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->arraySize = get_image_array_size( input );\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int arraySize;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image1d_array_t input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->arraySize = get_image_array_size( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
 
-int test_get_1Dimage_array_info_single(cl_context context,
-                                       cl_command_queue queue,
-                                       image_descriptor *imageInfo, MTdata d,
-                                       cl_mem_flags flags)
+int test_get_1Dimage_array_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error = 0;
 
@@ -69,9 +69,7 @@
     if( gDebugTrace )
         log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
 
-    image = create_image_1d_array(context, flags, imageInfo->format,
-                                  imageInfo->width, imageInfo->arraySize, 0, 0,
-                                  NULL, &error);
+    image = create_image_1d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->arraySize, 0, 0, NULL, &error );
     if( image == NULL )
     {
         log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -83,8 +81,6 @@
 
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
 
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -93,7 +89,7 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
 
     // Create a program to run against
-    sprintf(programSrc, methodTestKernelPattern, image_access_qualifier,
+    sprintf( programSrc, methodTestKernelPattern,
             channelTypeConstantString, channelOrderConstantString);
 
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -101,13 +97,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create kernel to test against" );
 
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
 
     // Set up arguments and run
@@ -158,9 +152,7 @@
     return error;
 }
 
-int test_get_image_info_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -194,8 +186,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
 
-                int ret = test_get_1Dimage_array_info_single(
-                    context, queue, &imageInfo, seed, flags);
+                int ret = test_get_1Dimage_array_info_single( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
             }
@@ -219,8 +210,7 @@
             log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
-            if (test_get_1Dimage_array_info_single(context, queue, &imageInfo,
-                                                   seed, flags))
+            if( test_get_1Dimage_array_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -252,8 +242,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
-            int ret = test_get_1Dimage_array_info_single(
-                context, queue, &imageInfo, seed, flags);
+            int ret = test_get_1Dimage_array_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp
index b0d4a70..64b9f26 100644
--- a/test_conformance/images/kernel_image_methods/test_2D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D.cpp

@@ -15,6 +15,10 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 
 struct image_kernel_data
 {
@@ -31,42 +35,38 @@
 };
 
 static const char *methodTestKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int height;\n"
-    "    int depth;\n"
-    "    int widthDim;\n"
-    "    int heightDim;\n"
-    "    int depthDim;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    " %s\n"
-    "__kernel void sample_kernel( %s image%dd%s_t input, __global "
-    "image_kernel_data *outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->height = get_image_height( input );\n"
-    "%s\n"
-    "   int%d dim = get_image_dim( input );\n"
-    "   outData->widthDim = dim.x;\n"
-    "   outData->heightDim = dim.y;\n"
-    "%s\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int height;\n"
+"    int depth;\n"
+"    int widthDim;\n"
+"    int heightDim;\n"
+"    int depthDim;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image%dd%s_t input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->height = get_image_height( input );\n"
+"%s\n"
+"   int%d dim = get_image_dim( input );\n"
+"   outData->widthDim = dim.x;\n"
+"   outData->heightDim = dim.y;\n"
+"%s\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
 
 static const char *depthKernelLine = "   outData->depth = get_image_depth( input );\n";
 static const char *depthDimKernelLine = "   outData->depthDim = dim.z;\n";
 
-int test_get_image_info_single(cl_context context, cl_command_queue queue,
-                               image_descriptor *imageInfo, MTdata d,
-                               cl_mem_flags flags)
+int test_get_image_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error = 0;
 
@@ -86,13 +86,9 @@
         log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
 
     if( imageInfo->depth != 0 )
-        image = create_image_3d(context, flags, imageInfo->format,
-                                imageInfo->width, imageInfo->height,
-                                imageInfo->depth, 0, 0, NULL, &error);
+        image = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, NULL, &error );
     else
-        image =
-            create_image_2d(context, flags, imageInfo->format, imageInfo->width,
-                            imageInfo->height, 0, NULL, &error);
+        image = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 0, NULL, &error );
     if( image == NULL )
     {
         log_error( "ERROR: Unable to create image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) );
@@ -104,12 +100,6 @@
 
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
-    const char *cl_khr_3d_image_writes_enabler = "";
-    if ((flags != CL_MEM_READ_ONLY) && (imageInfo->depth != 0))
-        cl_khr_3d_image_writes_enabler =
-            "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable";
 
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -118,13 +108,12 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
 
     // Create a program to run against
-    sprintf(programSrc, methodTestKernelPattern, cl_khr_3d_image_writes_enabler,
-            image_access_qualifier, (imageInfo->depth != 0) ? 3 : 2,
-            (imageInfo->format->image_channel_order == CL_DEPTH) ? "_depth"
-                                                                 : "",
-            (imageInfo->depth != 0) ? depthKernelLine : "",
-            (imageInfo->depth != 0) ? 4 : 2,
-            (imageInfo->depth != 0) ? depthDimKernelLine : "",
+    sprintf( programSrc, methodTestKernelPattern,
+            ( imageInfo->depth != 0 ) ? 3 : 2,
+            (imageInfo->format->image_channel_order == CL_DEPTH) ? "_depth" : "",
+            ( imageInfo->depth != 0 ) ? depthKernelLine : "",
+            ( imageInfo->depth != 0 ) ? 4 : 2,
+            ( imageInfo->depth != 0 ) ? depthDimKernelLine : "",
             channelTypeConstantString, channelOrderConstantString);
 
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -132,13 +121,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create kernel to test against" );
 
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
 
     // Set up arguments and run
@@ -209,9 +196,7 @@
     return error;
 }
 
-int test_get_image_info_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_get_image_info_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxHeight;
     cl_ulong maxAllocSize, memSize;
@@ -244,8 +229,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
 
-                int ret = test_get_image_info_single(context, queue, &imageInfo,
-                                                     seed, flags);
+                int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
             }
@@ -268,8 +252,7 @@
             log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
-            if (test_get_image_info_single(context, queue, &imageInfo, seed,
-                                           flags))
+            if( test_get_image_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -299,8 +282,7 @@
 
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
-            int ret = test_get_image_info_single(context, queue, &imageInfo,
-                                                 seed, flags);
+            int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/kernel_image_methods/test_2D_array.cpp b/test_conformance/images/kernel_image_methods/test_2D_array.cpp
index 21a6b04..85b8a7a 100644
--- a/test_conformance/images/kernel_image_methods/test_2D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D_array.cpp

@@ -15,6 +15,10 @@
 //
 #include "../testBase.h"
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 
 struct image_kernel_data
 {
@@ -28,32 +32,28 @@
 };
 
 static const char *methodTestKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int height;\n"
-    "    int arraySize;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    "__kernel void sample_kernel( %s %s input, __global image_kernel_data "
-    "*outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->height = get_image_height( input );\n"
-    "   outData->arraySize = get_image_array_size( input );\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int height;\n"
+"    int arraySize;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only %s input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->height = get_image_height( input );\n"
+"   outData->arraySize = get_image_array_size( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
 
-int test_get_2Dimage_array_info_single(cl_context context,
-                                       cl_command_queue queue,
-                                       image_descriptor *imageInfo, MTdata d,
-                                       cl_mem_flags flags)
+int test_get_2Dimage_array_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
 {
     int error = 0;
 
@@ -72,9 +72,7 @@
     if( gDebugTrace )
         log_info( " - Creating 2D image array %d by %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
 
-    image = create_image_2d_array(context, flags, imageInfo->format,
-                                  imageInfo->width, imageInfo->height,
-                                  imageInfo->arraySize, 0, 0, NULL, &error);
+    image = create_image_2d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, NULL, &error );
     if( image == NULL )
     {
         log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -86,8 +84,6 @@
 
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
 
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -96,10 +92,8 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
 
     // Create a program to run against
-    sprintf(programSrc, methodTestKernelPattern, image_access_qualifier,
-            (imageInfo->format->image_channel_order == CL_DEPTH)
-                ? "image2d_array_depth_t"
-                : "image2d_array_t",
+    sprintf( programSrc, methodTestKernelPattern,
+            (imageInfo->format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t" ,
             channelTypeConstantString, channelOrderConstantString);
 
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -107,13 +101,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create kernel to test against" );
 
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
 
     // Set up arguments and run
@@ -169,9 +161,7 @@
     return error;
 }
 
-int test_get_image_info_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxHeight, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -207,8 +197,7 @@
                 {
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
-                    int ret = test_get_2Dimage_array_info_single(
-                        context, queue, &imageInfo, seed, flags);
+                    int ret = test_get_2Dimage_array_info_single( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
                 }
@@ -234,8 +223,7 @@
             log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
             if( gDebugTrace )
                 log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
-            if (test_get_2Dimage_array_info_single(context, queue, &imageInfo,
-                                                   seed, flags))
+            if( test_get_2Dimage_array_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -244,9 +232,6 @@
         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
         {
             cl_ulong size;
-            cl_ulong slicePitch;
-            cl_ulong rowPitch;
-
             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
             // image, the result array, plus offset arrays, will fit in the global ram space
             do
@@ -255,30 +240,26 @@
                 imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
                 imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
 
-                rowPitch = imageInfo.width * pixelSize;
-                slicePitch = rowPitch * imageInfo.height;
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
 
                 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
-                rowPitch += extraWidth;
+                imageInfo.rowPitch += extraWidth;
 
                 do {
                     extraWidth++;
-                    rowPitch += extraWidth;
-                } while ((rowPitch % pixelSize) != 0);
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
 
                 size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
-                slicePitch = rowPitch * (imageInfo.height + extraHeight);
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
 
-                size = slicePitch * imageInfo.arraySize * 4 * 4;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
 
-            imageInfo.slicePitch = slicePitch;
-            imageInfo.rowPitch = rowPitch;
-
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
-            int ret = test_get_2Dimage_array_info_single(
-                context, queue, &imageInfo, seed, flags);
+            int ret = test_get_2Dimage_array_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/kernel_image_methods/test_3D.cpp b/test_conformance/images/kernel_image_methods/test_3D.cpp
index aae433b..390bf8f 100644
--- a/test_conformance/images/kernel_image_methods/test_3D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_3D.cpp

@@ -15,14 +15,14 @@
 //
 #include "../testBase.h"
 
-extern int test_get_image_info_single(cl_context context,
-                                      cl_command_queue queue,
-                                      image_descriptor *imageInfo, MTdata d,
-                                      cl_mem_flags flags);
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
 
-int test_get_image_info_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
+
+extern int test_get_image_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d );
+
+int test_get_image_info_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
 {
     size_t maxWidth, maxHeight, maxDepth;
     cl_ulong maxAllocSize, memSize;
@@ -30,16 +30,6 @@
     RandomSeed seed( gRandomSeed );
     size_t pixelSize;
 
-    if ((flags != CL_MEM_READ_ONLY)
-        && !is_extension_available(device, "cl_khr_3d_image_writes"))
-    {
-        log_info("-----------------------------------------------------\n");
-        log_info("This device does not support cl_khr_3d_image_writes.\n"
-                 "Skipping 3d image write test.\n");
-        log_info("-----------------------------------------------------\n\n");
-        return 0;
-    }
-
     imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
     imageInfo.format = format;
     pixelSize = get_pixel_size( imageInfo.format );
@@ -68,8 +58,7 @@
                 {
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
-                    int ret = test_get_image_info_single(
-                        context, queue, &imageInfo, seed, flags);
+                    int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
                 }
@@ -95,8 +84,7 @@
             log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
             if( gDebugTrace )
                 log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
-            if (test_get_image_info_single(context, queue, &imageInfo, seed,
-                                           flags))
+            if( test_get_image_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
         }
     }
@@ -105,9 +93,6 @@
         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
         {
             cl_ulong size;
-            cl_ulong slicePitch;
-            cl_ulong rowPitch;
-
             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
             // image, the result array, plus offset arrays, will fit in the global ram space
             do
@@ -116,30 +101,26 @@
                 imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
                 imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
 
-                rowPitch = imageInfo.width * pixelSize;
-                slicePitch = imageInfo.rowPitch * imageInfo.height;
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
 
                 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
-                rowPitch += extraWidth;
+                imageInfo.rowPitch += extraWidth;
 
                 do {
                     extraWidth++;
-                    rowPitch += extraWidth;
-                } while ((rowPitch % pixelSize) != 0);
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
 
                 size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
-                slicePitch = rowPitch * (imageInfo.height + extraHeight);
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
 
-                size = slicePitch * imageInfo.depth * 4 * 4;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
 
-            imageInfo.slicePitch = slicePitch;
-            imageInfo.rowPitch = rowPitch;
-
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
-            int ret = test_get_image_info_single(context, queue, &imageInfo,
-                                                 seed, flags);
+            int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
         }

diff --git a/test_conformance/images/kernel_image_methods/test_loops.cpp b/test_conformance/images/kernel_image_methods/test_loops.cpp
index 1d892a9..3b56d3e 100644
--- a/test_conformance/images/kernel_image_methods/test_loops.cpp
+++ b/test_conformance/images/kernel_image_methods/test_loops.cpp

@@ -16,24 +16,20 @@
 #include "../testBase.h"
 #include "../common.h"
 
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern bool gDeviceLt20;
 
-extern int test_get_image_info_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_get_image_info_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_get_image_info_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_get_image_info_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
-extern int test_get_image_info_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
+extern bool gDebugTrace;
+
+extern int test_get_image_info_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 
 int test_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_object_type imageType, cl_mem_flags flags )
 {
@@ -42,14 +38,24 @@
     int ret = 0;
 
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
 
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
 
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
     {
         int test_return = 0;
         if( filterFlags[i] )
@@ -65,24 +71,19 @@
 
         switch (imageType) {
             case CL_MEM_OBJECT_IMAGE1D:
-                test_return = test_get_image_info_1D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_get_image_info_1D( device, context, queue, &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
-                test_return = test_get_image_info_2D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_get_image_info_2D( device, context, queue, &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
-                test_return = test_get_image_info_3D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_get_image_info_3D( device, context, queue, &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                test_return = test_get_image_info_1D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_get_image_info_1D_array( device, context, queue, &formatList[ i ] );
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                test_return = test_get_image_info_2D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_get_image_info_2D_array( device, context, queue, &formatList[ i ] );
                 break;
         }
 
@@ -96,6 +97,9 @@
         ret += test_return;
     }
 
+    delete filterFlags;
+    delete formatList;
+
     return ret;
 }
 
@@ -103,6 +107,9 @@
 {
     int version_check;
     auto version = get_device_cl_version(device);
+    if (version < Version(2, 0)) {
+        gDeviceLt20 = true;
+    }
 
     if ((version_check = (version < Version(1, 2))))
     {

diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp
index 31dceb3..51d5c07 100644
--- a/test_conformance/images/kernel_read_write/main.cpp
+++ b/test_conformance/images/kernel_read_write/main.cpp

@@ -13,14 +13,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/fpcontrol.h"
 #include "../harness/parseParameters.h"
 
+#include <vector>
+
 #if defined(__PPC__)
 // Global varaiable used to hold the FPU control register state. The FPSCR register can not
 // be used because not all Power implementations retain or observed the NI (non-IEEE
@@ -35,6 +43,7 @@
 bool gTestMaxImages;
 bool gTestImage2DFromBuffer;
 bool gTestMipmaps;
+bool gDeviceLt20 = false;
 cl_filter_mode    gFilterModeToUse = (cl_filter_mode)-1;
 // Default is CL_MEM_USE_HOST_PTR for the test
 cl_mem_flags    gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
@@ -49,6 +58,8 @@
 int             gtestTypesToRun = 0;
 static int testTypesToRun;
 
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
 static void printUsage( const char *execName );
 
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
@@ -106,6 +117,10 @@
     bool            tDisableOffsets = false;
     bool            tNormalizedModeToUse = false;
     cl_filter_mode  tFilterModeToUse = (cl_filter_mode)-1;
+    auto version = get_device_cl_version(device);
+    if (version < Version(2, 0)) {
+        gDeviceLt20 = true;
+    }
 
     if( testTypesToRun & kReadTests )
     {
@@ -162,10 +177,11 @@
         }
     }
 
-    if ((testTypesToRun & kReadWriteTests)
-        && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
+    if (testTypesToRun & kReadWriteTests) {
+        if (gDeviceLt20)  {
+            log_info("TEST skipped, Opencl 2.0 + requried for this test");
+            return ret;
+        }
     }
 
     if( ( testTypesToRun & kReadWriteTests ) && !gTestMipmaps )
@@ -390,8 +406,7 @@
     FPU_mode_type oldMode;
     DisableFTZ(&oldMode);
 
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
 
     // Restore FP state before leaving
     RestoreFPState(&oldMode);

diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index e76710b..5182601 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp

@@ -1,22 +1,6 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 
 #include "test_common.h"
 
-
 cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
     cl_sampler sampler = nullptr;
     if (test_mipmaps) {
@@ -33,1514 +17,3 @@
     return sampler;
 }
 
-void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                           image_sampler_data *imageSampler, float *xOffsets,
-                           float *yOffsets, float *zOffsets, float xfract,
-                           float yfract, float zfract, int normalized_coords,
-                           MTdata d, int lod)
-{
-    size_t i = 0;
-    if (gDisableOffsets)
-    {
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] = (float)(xfract + (double)x);
-                    yOffsets[i] = (float)(yfract + (double)y);
-                    zOffsets[i] = (float)(zfract + (double)z);
-                }
-            }
-        }
-    }
-    else
-    {
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] =
-                        (float)(xfract
-                                + (double)((int)x
-                                           + random_in_range(-10, 10, d)));
-                    yOffsets[i] =
-                        (float)(yfract
-                                + (double)((int)y
-                                           + random_in_range(-10, 10, d)));
-                    zOffsets[i] =
-                        (float)(zfract
-                                + (double)((int)z
-                                           + random_in_range(-10, 10, d)));
-                }
-            }
-        }
-    }
-
-    if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
-    {
-        i = 0;
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
-                                               (double)imageInfo->width - 1.0);
-                    yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
-                                               (double)imageInfo->height - 1.0);
-                    zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
-                                               (double)imageInfo->depth - 1.0);
-                }
-            }
-        }
-    }
-
-    if (normalized_coords || gTestMipmaps)
-    {
-        i = 0;
-        if (lod == 0)
-        {
-            for (size_t z = 0; z < imageInfo->depth; z++)
-            {
-                for (size_t y = 0; y < imageInfo->height; y++)
-                {
-                    for (size_t x = 0; x < imageInfo->width; x++, i++)
-                    {
-                        xOffsets[i] = (float)((double)xOffsets[i]
-                                              / (double)imageInfo->width);
-                        yOffsets[i] = (float)((double)yOffsets[i]
-                                              / (double)imageInfo->height);
-                        zOffsets[i] = (float)((double)zOffsets[i]
-                                              / (double)imageInfo->depth);
-                    }
-                }
-            }
-        }
-        else if (gTestMipmaps)
-        {
-            size_t width_lod, height_lod, depth_lod;
-
-            width_lod =
-                (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height_lod =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth_lod =
-                (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-
-            for (size_t z = 0; z < depth_lod; z++)
-            {
-                for (size_t y = 0; y < height_lod; y++)
-                {
-                    for (size_t x = 0; x < width_lod; x++, i++)
-                    {
-                        xOffsets[i] =
-                            (float)((double)xOffsets[i] / (double)width_lod);
-                        yOffsets[i] =
-                            (float)((double)yOffsets[i] / (double)height_lod);
-                        zOffsets[i] =
-                            (float)((double)zOffsets[i] / (double)depth_lod);
-                    }
-                }
-            }
-        }
-    }
-}
-
-int test_read_image(cl_context context, cl_command_queue queue,
-                    cl_kernel kernel, image_descriptor *imageInfo,
-                    image_sampler_data *imageSampler, bool useFloatCoords,
-                    ExplicitType outputType, MTdata d)
-{
-    int error;
-    size_t threads[3];
-    static int initHalf = 0;
-
-    cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
-
-    clMemWrapper xOffsets, yOffsets, zOffsets, results;
-    clSamplerWrapper actualSampler;
-    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
-
-    // Create offset data
-    BufferOwningPtr<cl_float> xOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
-    BufferOwningPtr<cl_float> yOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
-    BufferOwningPtr<cl_float> zOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
-
-    if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
-        if (DetectFloatToHalfRoundingMode(queue)) return 1;
-
-    BufferOwningPtr<char> imageValues;
-    generate_random_image_data(imageInfo, imageValues, d);
-
-    // Construct testing sources
-    clProtectedImage protImage;
-    clMemWrapper unprotImage;
-    cl_mem image;
-
-    if (gtestTypesToRun & kReadTests)
-    {
-        image_read_write_flags = CL_MEM_READ_ONLY;
-    }
-    else
-    {
-        image_read_write_flags = CL_MEM_READ_WRITE;
-    }
-
-    if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR)
-    {
-        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the
-        // testing (via Ian) Do not use protected images for max image size test
-        // since it rounds the row size to a page size
-        if (gTestMaxImages)
-        {
-            generate_random_image_data(imageInfo,
-                                       maxImageUseHostPtrBackingStore, d);
-            unprotImage = create_image_3d(
-                context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
-                (gEnablePitch ? imageInfo->slicePitch : 0),
-                maxImageUseHostPtrBackingStore, &error);
-        }
-        else
-        {
-            error = protImage.Create(context, image_read_write_flags,
-                                     imageInfo->format, imageInfo->width,
-                                     imageInfo->height, imageInfo->depth);
-        }
-        if (error != CL_SUCCESS)
-        {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
-                      "(pitch %d, %d ) (%s)",
-                      (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
-            return error;
-        }
-        if (gTestMaxImages)
-            image = (cl_mem)unprotImage;
-        else
-            image = (cl_mem)protImage;
-    }
-    else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR)
-    {
-        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
-        // in
-        unprotImage = create_image_3d(
-            context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
-            imageInfo->format, imageInfo->width, imageInfo->height,
-            imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
-            (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
-                      "(pitch %d, %d ) (%s)",
-                      (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
-            return error;
-        }
-        image = unprotImage;
-    }
-    else // Either CL_MEM_ALLOC_HOST_PTR or none
-    {
-        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can
-        // be accessed by the host, but otherwise it works just as if no flag is
-        // specified, so we just do the same thing either way
-        if (!gTestMipmaps)
-        {
-            unprotImage = create_image_3d(
-                context, image_read_write_flags | gMemFlagsToUse,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
-                (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("ERROR: Unable to create 3D image of size %d x %d x "
-                          "%d (pitch %d, %d ) (%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                          (int)imageInfo->slicePitch, IGetErrorString(error));
-                return error;
-            }
-            image = unprotImage;
-        }
-        else
-        {
-            cl_image_desc image_desc = { 0 };
-            image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-            image_desc.image_width = imageInfo->width;
-            image_desc.image_height = imageInfo->height;
-            image_desc.image_depth = imageInfo->depth;
-            image_desc.num_mip_levels = imageInfo->num_mip_levels;
-
-
-            unprotImage =
-                clCreateImage(context, image_read_write_flags,
-                              imageInfo->format, &image_desc, NULL, &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("ERROR: Unable to create %d level mipmapped 3D image "
-                          "of size %d x %d x %d (pitch %d, %d ) (%s)",
-                          (int)imageInfo->num_mip_levels, (int)imageInfo->width,
-                          (int)imageInfo->height, (int)imageInfo->depth,
-                          (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
-                          IGetErrorString(error));
-                return error;
-            }
-            image = unprotImage;
-        }
-    }
-
-    if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
-    {
-        size_t origin[4] = { 0, 0, 0, 0 };
-        size_t region[3] = { imageInfo->width, imageInfo->height,
-                             imageInfo->depth };
-
-        if (gDebugTrace) log_info(" - Writing image...\n");
-
-        if (!gTestMipmaps)
-        {
-
-            error =
-                clEnqueueWriteImage(queue, image, CL_TRUE, origin, region,
-                                    gEnablePitch ? imageInfo->rowPitch : 0,
-                                    gEnablePitch ? imageInfo->slicePitch : 0,
-                                    imageValues, 0, NULL, NULL);
-
-            if (error != CL_SUCCESS)
-            {
-                log_error("ERROR: Unable to write to 3D image of size %d x %d "
-                          "x %d \n",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth);
-                return error;
-            }
-        }
-        else
-        {
-            int nextLevelOffset = 0;
-
-            for (int i = 0; i < imageInfo->num_mip_levels; i++)
-            {
-                origin[3] = i;
-                error = clEnqueueWriteImage(
-                    queue, image, CL_TRUE, origin, region,
-                    /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
-                    /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
-                    ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("ERROR: Unable to write to %d level mipmapped 3D "
-                              "image of size %d x %d x %d\n",
-                              (int)imageInfo->num_mip_levels,
-                              (int)imageInfo->width, (int)imageInfo->height,
-                              (int)imageInfo->depth);
-                    return error;
-                }
-                nextLevelOffset += region[0] * region[1] * region[2]
-                    * get_pixel_size(imageInfo->format);
-                // Subsequent mip level dimensions keep halving
-                region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
-                region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
-                region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
-            }
-        }
-    }
-
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              xOffsetValues, &error);
-    test_error(error, "Unable to create x offset buffer");
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              yOffsetValues, &error);
-    test_error(error, "Unable to create y offset buffer");
-    zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              zOffsetValues, &error);
-    test_error(error, "Unable to create y offset buffer");
-    results =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       get_explicit_type_size(outputType) * 4 * imageInfo->width
-                           * imageInfo->height * imageInfo->depth,
-                       NULL, &error);
-    test_error(error, "Unable to create result buffer");
-
-    // Create sampler to use
-    actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
-    test_error(error, "Unable to create image sampler");
-
-    // Set arguments
-    int idx = 0;
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image);
-    test_error(error, "Unable to set kernel arguments");
-    if (!gUseKernelSamplers)
-    {
-        error =
-            clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler);
-        test_error(error, "Unable to set kernel arguments");
-    }
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets);
-    test_error(error, "Unable to set kernel arguments");
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets);
-    test_error(error, "Unable to set kernel arguments");
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets);
-    test_error(error, "Unable to set kernel arguments");
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results);
-    test_error(error, "Unable to set kernel arguments");
-
-    const float float_offsets[] = { 0.0f,
-                                    MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30),
-                                    0.25f,
-                                    0.3f,
-                                    0.5f - FLT_EPSILON / 4.0f,
-                                    0.5f,
-                                    0.9f,
-                                    1.0f - FLT_EPSILON / 2 };
-    int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]);
-    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
-    int loopCount = 2 * float_offset_count;
-    if (!useFloatCoords) loopCount = 1;
-    if (gTestMaxImages)
-    {
-        loopCount = 1;
-        log_info("Testing each size only once with pixel offsets of %g for max "
-                 "sized images.\n",
-                 float_offsets[0]);
-    }
-
-    // Get the maximum absolute error for this format
-    double formatAbsoluteError =
-        get_max_absolute_error(imageInfo->format, imageSampler);
-    if (gDebugTrace)
-        log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
-
-    if (0 == initHalf
-        && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
-    {
-        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue);
-        if (initHalf)
-        {
-            log_info("Half rounding mode successfully detected.\n");
-        }
-    }
-
-    int nextLevelOffset = 0;
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
-
-    // Loop over all mipmap levels, if we are testing mipmapped images.
-    for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
-         || (!gTestMipmaps && lod < 1);
-         lod++)
-    {
-        size_t resultValuesSize = width_lod * height_lod * depth_lod
-            * get_explicit_type_size(outputType) * 4;
-        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
-        float lod_float = (float)lod;
-        if (gTestMipmaps)
-        {
-            // Set the lod kernel arg
-            if (gDebugTrace) log_info(" - Working at mip level %d\n", lod);
-            error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float);
-            test_error(error, "Unable to set kernel arguments");
-        }
-
-        for (int q = 0; q < loopCount; q++)
-        {
-            float offset = float_offsets[q % float_offset_count];
-
-            // Init the coordinates
-            InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
-                                  yOffsetValues, zOffsetValues,
-                                  q >= float_offset_count ? -offset : offset,
-                                  q >= float_offset_count ? offset : -offset,
-                                  q >= float_offset_count ? -offset : offset,
-                                  imageSampler->normalized_coords, d, lod);
-
-            error =
-                clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     xOffsetValues, 0, NULL, NULL);
-            test_error(error, "Unable to write x offsets");
-            error =
-                clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     yOffsetValues, 0, NULL, NULL);
-            test_error(error, "Unable to write y offsets");
-            error =
-                clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     zOffsetValues, 0, NULL, NULL);
-            test_error(error, "Unable to write z offsets");
-
-
-            memset(resultValues, 0xff, resultValuesSize);
-            clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize,
-                                 resultValues, 0, NULL, NULL);
-
-            // Figure out thread dimensions
-            threads[0] = (size_t)width_lod;
-            threads[1] = (size_t)height_lod;
-            threads[2] = (size_t)depth_lod;
-
-            // Run the kernel
-            error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads,
-                                           NULL, 0, NULL, NULL);
-            test_error(error, "Unable to run kernel");
-
-            // Get results
-            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                        width_lod * height_lod * depth_lod
-                                            * get_explicit_type_size(outputType)
-                                            * 4,
-                                        resultValues, 0, NULL, NULL);
-            test_error(error, "Unable to read results from kernel");
-            if (gDebugTrace) log_info("    results read\n");
-
-            // Validate results element by element
-            char *imagePtr = (char *)imageValues + nextLevelOffset;
-            /*
-             * FLOAT output type
-             */
-            if (is_sRGBA_order(imageInfo->format->image_channel_order)
-                && (outputType == kFloat))
-            {
-                // Validate float results
-                float *resultPtr = (float *)(char *)resultValues;
-                float expected[4], error = 0.0f;
-                float maxErr = get_max_relative_error(
-                    imageInfo->format, imageSampler, 1 /*3D*/,
-                    CL_FILTER_LINEAR == imageSampler->filter_mode);
-
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            float offset = NORM_OFFSET;
-                            if (!imageSampler->normalized_coords
-                                || imageSampler->filter_mode
-                                    != CL_FILTER_NEAREST
-                                || NORM_OFFSET == 0
-#if defined(__APPLE__)
-                                // Apple requires its CPU implementation to do
-                                // correctly rounded address arithmetic in all
-                                // modes
-                                || gDeviceType != CL_DEVICE_TYPE_GPU
-#endif
-                            )
-                                offset = 0.0f; // Loop only once
-
-                            for (float norm_offset_x = -offset;
-                                 norm_offset_x <= offset && !found_pixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -offset;
-                                     norm_offset_y <= offset && !found_pixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -offset;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-
-                                        int hasDenormals = 0;
-                                        FloatPixel maxPixel =
-                                            sample_image_pixel_float_offset(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, 0,
-                                                &hasDenormals, lod);
-
-                                        float err1 =
-                                            ABS_ERROR(sRGBmap(resultPtr[0]),
-                                                      sRGBmap(expected[0]));
-                                        float err2 =
-                                            ABS_ERROR(sRGBmap(resultPtr[1]),
-                                                      sRGBmap(expected[1]));
-                                        float err3 =
-                                            ABS_ERROR(sRGBmap(resultPtr[2]),
-                                                      sRGBmap(expected[2]));
-                                        float err4 = ABS_ERROR(resultPtr[3],
-                                                               expected[3]);
-                                        // Clamp to the minimum absolute error
-                                        // for the format
-                                        if (err1 > 0
-                                            && err1 < formatAbsoluteError)
-                                        {
-                                            err1 = 0.0f;
-                                        }
-                                        if (err2 > 0
-                                            && err2 < formatAbsoluteError)
-                                        {
-                                            err2 = 0.0f;
-                                        }
-                                        if (err3 > 0
-                                            && err3 < formatAbsoluteError)
-                                        {
-                                            err3 = 0.0f;
-                                        }
-                                        if (err4 > 0
-                                            && err4 < formatAbsoluteError)
-                                        {
-                                            err4 = 0.0f;
-                                        }
-                                        float maxErr = 0.5;
-
-                                        if (!(err1 <= maxErr)
-                                            || !(err2 <= maxErr)
-                                            || !(err3 <= maxErr)
-                                            || !(err4 <= maxErr))
-                                        {
-                                            // Try flushing the denormals
-                                            if (hasDenormals)
-                                            {
-                                                // If implementation decide to
-                                                // flush subnormals to zero, max
-                                                // error needs to be adjusted
-                                                maxErr += 4 * FLT_MIN;
-
-                                                maxPixel =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, expected,
-                                                        0, NULL, lod);
-
-                                                err1 = ABS_ERROR(
-                                                    sRGBmap(resultPtr[0]),
-                                                    sRGBmap(expected[0]));
-                                                err2 = ABS_ERROR(
-                                                    sRGBmap(resultPtr[1]),
-                                                    sRGBmap(expected[1]));
-                                                err3 = ABS_ERROR(
-                                                    sRGBmap(resultPtr[2]),
-                                                    sRGBmap(expected[2]));
-                                                err4 = ABS_ERROR(resultPtr[3],
-                                                                 expected[3]);
-                                            }
-                                        }
-
-                                        found_pixel = (err1 <= maxErr)
-                                            && (err2 <= maxErr)
-                                            && (err3 <= maxErr)
-                                            && (err4 <= maxErr);
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -offset;
-                                     norm_offset_x <= offset
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -offset;
-                                         norm_offset_y <= offset
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -offset;
-                                             norm_offset_z <= offset
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-
-                                            int hasDenormals = 0;
-                                            FloatPixel maxPixel =
-                                                sample_image_pixel_float_offset(
-                                                    imagePtr, imageInfo,
-                                                    xOffsetValues[j],
-                                                    yOffsetValues[j],
-                                                    zOffsetValues[j],
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z, imageSampler,
-                                                    expected, 0, &hasDenormals,
-                                                    lod);
-
-                                            float err1 =
-                                                ABS_ERROR(sRGBmap(resultPtr[0]),
-                                                          sRGBmap(expected[0]));
-                                            float err2 =
-                                                ABS_ERROR(sRGBmap(resultPtr[1]),
-                                                          sRGBmap(expected[1]));
-                                            float err3 =
-                                                ABS_ERROR(sRGBmap(resultPtr[2]),
-                                                          sRGBmap(expected[2]));
-                                            float err4 = ABS_ERROR(resultPtr[3],
-                                                                   expected[3]);
-                                            float maxErr = 0.6;
-
-                                            if (!(err1 <= maxErr)
-                                                || !(err2 <= maxErr)
-                                                || !(err3 <= maxErr)
-                                                || !(err4 <= maxErr))
-                                            {
-                                                // Try flushing the denormals
-                                                if (hasDenormals)
-                                                {
-                                                    // If implementation decide
-                                                    // to flush subnormals to
-                                                    // zero, max error needs to
-                                                    // be adjusted
-                                                    maxErr += 4 * FLT_MIN;
-
-                                                    maxPixel =
-                                                        sample_image_pixel_float(
-                                                            imagePtr, imageInfo,
-                                                            xOffsetValues[j],
-                                                            yOffsetValues[j],
-                                                            zOffsetValues[j],
-                                                            imageSampler,
-                                                            expected, 0, NULL,
-                                                            lod);
-
-                                                    err1 = ABS_ERROR(
-                                                        sRGBmap(resultPtr[0]),
-                                                        sRGBmap(expected[0]));
-                                                    err2 = ABS_ERROR(
-                                                        sRGBmap(resultPtr[1]),
-                                                        sRGBmap(expected[1]));
-                                                    err3 = ABS_ERROR(
-                                                        sRGBmap(resultPtr[2]),
-                                                        sRGBmap(expected[2]));
-                                                    err4 =
-                                                        ABS_ERROR(resultPtr[3],
-                                                                  expected[3]);
-                                                }
-                                            }
-
-                                            if (!(err1 <= maxErr)
-                                                || !(err2 <= maxErr)
-                                                || !(err3 <= maxErr)
-                                                || !(err4 <= maxErr))
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-
-                                                float tempOut[4];
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        float>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        true, lod);
-                                                log_error("Step by step:\n");
-                                                FloatPixel temp =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, tempOut,
-                                                        1 /*verbose*/,
-                                                        &hasDenormals, lod);
-                                                log_error(
-                                                    "\tulps: %2.2f, %2.2f, "
-                                                    "%2.2f, %2.2f  (max "
-                                                    "allowed: %2.2f)\n\n",
-                                                    Ulp_Error(resultPtr[0],
-                                                              expected[0]),
-                                                    Ulp_Error(resultPtr[1],
-                                                              expected[1]),
-                                                    Ulp_Error(resultPtr[2],
-                                                              expected[2]),
-                                                    Ulp_Error(resultPtr[3],
-                                                              expected[3]),
-                                                    Ulp_Error(
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)
-                                                            + maxErr,
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)));
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-            /*
-             * FLOAT output type
-             */
-            else if (outputType == kFloat)
-            {
-                // Validate float results
-                float *resultPtr = (float *)(char *)resultValues;
-                float expected[4], error = 0.0f;
-                float maxErr = get_max_relative_error(
-                    imageInfo->format, imageSampler, 1 /*3D*/,
-                    CL_FILTER_LINEAR == imageSampler->filter_mode);
-
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            float offset = NORM_OFFSET;
-                            if (!imageSampler->normalized_coords
-                                || imageSampler->filter_mode
-                                    != CL_FILTER_NEAREST
-                                || NORM_OFFSET == 0
-#if defined(__APPLE__)
-                                // Apple requires its CPU implementation to do
-                                // correctly rounded address arithmetic in all
-                                // modes
-                                || gDeviceType != CL_DEVICE_TYPE_GPU
-#endif
-                            )
-                                offset = 0.0f; // Loop only once
-
-                            for (float norm_offset_x = -offset;
-                                 norm_offset_x <= offset && !found_pixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -offset;
-                                     norm_offset_y <= offset && !found_pixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -offset;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-
-                                        int hasDenormals = 0;
-                                        FloatPixel maxPixel =
-                                            sample_image_pixel_float_offset(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, 0,
-                                                &hasDenormals, lod);
-
-                                        float err1 = ABS_ERROR(resultPtr[0],
-                                                               expected[0]);
-                                        float err2 = ABS_ERROR(resultPtr[1],
-                                                               expected[1]);
-                                        float err3 = ABS_ERROR(resultPtr[2],
-                                                               expected[2]);
-                                        float err4 = ABS_ERROR(resultPtr[3],
-                                                               expected[3]);
-                                        // Clamp to the minimum absolute error
-                                        // for the format
-                                        if (err1 > 0
-                                            && err1 < formatAbsoluteError)
-                                        {
-                                            err1 = 0.0f;
-                                        }
-                                        if (err2 > 0
-                                            && err2 < formatAbsoluteError)
-                                        {
-                                            err2 = 0.0f;
-                                        }
-                                        if (err3 > 0
-                                            && err3 < formatAbsoluteError)
-                                        {
-                                            err3 = 0.0f;
-                                        }
-                                        if (err4 > 0
-                                            && err4 < formatAbsoluteError)
-                                        {
-                                            err4 = 0.0f;
-                                        }
-                                        float maxErr1 = MAX(
-                                            maxErr * maxPixel.p[0], FLT_MIN);
-                                        float maxErr2 = MAX(
-                                            maxErr * maxPixel.p[1], FLT_MIN);
-                                        float maxErr3 = MAX(
-                                            maxErr * maxPixel.p[2], FLT_MIN);
-                                        float maxErr4 = MAX(
-                                            maxErr * maxPixel.p[3], FLT_MIN);
-
-                                        if (!(err1 <= maxErr1)
-                                            || !(err2 <= maxErr2)
-                                            || !(err3 <= maxErr3)
-                                            || !(err4 <= maxErr4))
-                                        {
-                                            // Try flushing the denormals
-                                            if (hasDenormals)
-                                            {
-                                                // If implementation decide to
-                                                // flush subnormals to zero, max
-                                                // error needs to be adjusted
-                                                maxErr1 += 4 * FLT_MIN;
-                                                maxErr2 += 4 * FLT_MIN;
-                                                maxErr3 += 4 * FLT_MIN;
-                                                maxErr4 += 4 * FLT_MIN;
-
-                                                maxPixel =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, expected,
-                                                        0, NULL, lod);
-
-                                                err1 = ABS_ERROR(resultPtr[0],
-                                                                 expected[0]);
-                                                err2 = ABS_ERROR(resultPtr[1],
-                                                                 expected[1]);
-                                                err3 = ABS_ERROR(resultPtr[2],
-                                                                 expected[2]);
-                                                err4 = ABS_ERROR(resultPtr[3],
-                                                                 expected[3]);
-                                            }
-                                        }
-
-                                        found_pixel = (err1 <= maxErr1)
-                                            && (err2 <= maxErr2)
-                                            && (err3 <= maxErr3)
-                                            && (err4 <= maxErr4);
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -offset;
-                                     norm_offset_x <= offset
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -offset;
-                                         norm_offset_y <= offset
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -offset;
-                                             norm_offset_z <= offset
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-
-                                            int hasDenormals = 0;
-                                            FloatPixel maxPixel =
-                                                sample_image_pixel_float_offset(
-                                                    imagePtr, imageInfo,
-                                                    xOffsetValues[j],
-                                                    yOffsetValues[j],
-                                                    zOffsetValues[j],
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z, imageSampler,
-                                                    expected, 0, &hasDenormals,
-                                                    lod);
-
-                                            float err1 = ABS_ERROR(resultPtr[0],
-                                                                   expected[0]);
-                                            float err2 = ABS_ERROR(resultPtr[1],
-                                                                   expected[1]);
-                                            float err3 = ABS_ERROR(resultPtr[2],
-                                                                   expected[2]);
-                                            float err4 = ABS_ERROR(resultPtr[3],
-                                                                   expected[3]);
-                                            float maxErr1 =
-                                                MAX(maxErr * maxPixel.p[0],
-                                                    FLT_MIN);
-                                            float maxErr2 =
-                                                MAX(maxErr * maxPixel.p[1],
-                                                    FLT_MIN);
-                                            float maxErr3 =
-                                                MAX(maxErr * maxPixel.p[2],
-                                                    FLT_MIN);
-                                            float maxErr4 =
-                                                MAX(maxErr * maxPixel.p[3],
-                                                    FLT_MIN);
-
-
-                                            if (!(err1 <= maxErr1)
-                                                || !(err2 <= maxErr2)
-                                                || !(err3 <= maxErr3)
-                                                || !(err4 <= maxErr4))
-                                            {
-                                                // Try flushing the denormals
-                                                if (hasDenormals)
-                                                {
-                                                    maxErr1 += 4 * FLT_MIN;
-                                                    maxErr2 += 4 * FLT_MIN;
-                                                    maxErr3 += 4 * FLT_MIN;
-                                                    maxErr4 += 4 * FLT_MIN;
-
-                                                    maxPixel =
-                                                        sample_image_pixel_float(
-                                                            imagePtr, imageInfo,
-                                                            xOffsetValues[j],
-                                                            yOffsetValues[j],
-                                                            zOffsetValues[j],
-                                                            imageSampler,
-                                                            expected, 0, NULL,
-                                                            lod);
-
-                                                    err1 =
-                                                        ABS_ERROR(resultPtr[0],
-                                                                  expected[0]);
-                                                    err2 =
-                                                        ABS_ERROR(resultPtr[1],
-                                                                  expected[1]);
-                                                    err3 =
-                                                        ABS_ERROR(resultPtr[2],
-                                                                  expected[2]);
-                                                    err4 =
-                                                        ABS_ERROR(resultPtr[3],
-                                                                  expected[3]);
-                                                }
-                                            }
-
-                                            if (!(err1 <= maxErr1)
-                                                || !(err2 <= maxErr2)
-                                                || !(err3 <= maxErr3)
-                                                || !(err4 <= maxErr4))
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-
-                                                float tempOut[4];
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        float>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        true, lod);
-                                                log_error("Step by step:\n");
-                                                FloatPixel temp =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, tempOut,
-                                                        1 /*verbose*/,
-                                                        &hasDenormals, lod);
-                                                log_error(
-                                                    "\tulps: %2.2f, %2.2f, "
-                                                    "%2.2f, %2.2f  (max "
-                                                    "allowed: %2.2f)\n\n",
-                                                    Ulp_Error(resultPtr[0],
-                                                              expected[0]),
-                                                    Ulp_Error(resultPtr[1],
-                                                              expected[1]),
-                                                    Ulp_Error(resultPtr[2],
-                                                              expected[2]),
-                                                    Ulp_Error(resultPtr[3],
-                                                              expected[3]),
-                                                    Ulp_Error(
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)
-                                                            + maxErr,
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)));
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-            /*
-             * UINT output type
-             */
-            else if (outputType == kUInt)
-            {
-                // Validate unsigned integer results
-                unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
-                unsigned int expected[4];
-                float error;
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            for (float norm_offset_x = -NORM_OFFSET;
-                                 norm_offset_x <= NORM_OFFSET && !found_pixel
-                                 && !checkOnlyOnePixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -NORM_OFFSET;
-                                     norm_offset_y <= NORM_OFFSET
-                                     && !found_pixel && !checkOnlyOnePixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -NORM_OFFSET;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel && !checkOnlyOnePixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-
-                                        // If we are not on a GPU, or we are not
-                                        // normalized, then only test with
-                                        // offsets (0.0, 0.0) E.g., test one
-                                        // pixel.
-                                        if (!imageSampler->normalized_coords
-                                            || gDeviceType != CL_DEVICE_TYPE_GPU
-                                            || NORM_OFFSET == 0)
-                                        {
-                                            norm_offset_x = 0.0f;
-                                            norm_offset_y = 0.0f;
-                                            norm_offset_z = 0.0f;
-                                            checkOnlyOnePixel = 1;
-                                        }
-
-                                        sample_image_pixel_offset<unsigned int>(
-                                            imagePtr, imageInfo,
-                                            xOffsetValues[j], yOffsetValues[j],
-                                            zOffsetValues[j], norm_offset_x,
-                                            norm_offset_y, norm_offset_z,
-                                            imageSampler, expected, lod);
-
-                                        error = errMax(
-                                            errMax(abs_diff_uint(expected[0],
-                                                                 resultPtr[0]),
-                                                   abs_diff_uint(expected[1],
-                                                                 resultPtr[1])),
-                                            errMax(
-                                                abs_diff_uint(expected[2],
-                                                              resultPtr[2]),
-                                                abs_diff_uint(expected[3],
-                                                              resultPtr[3])));
-
-                                        if (error < MAX_ERR) found_pixel = 1;
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -NORM_OFFSET;
-                                     norm_offset_x <= NORM_OFFSET
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -NORM_OFFSET;
-                                         norm_offset_y <= NORM_OFFSET
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -NORM_OFFSET;
-                                             norm_offset_z <= NORM_OFFSET
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-
-                                            // If we are not on a GPU, or we are
-                                            // not normalized, then only test
-                                            // with offsets (0.0, 0.0) E.g.,
-                                            // test one pixel.
-                                            if (!imageSampler->normalized_coords
-                                                || gDeviceType
-                                                    != CL_DEVICE_TYPE_GPU
-                                                || NORM_OFFSET == 0)
-                                            {
-                                                norm_offset_x = 0.0f;
-                                                norm_offset_y = 0.0f;
-                                                norm_offset_z = 0.0f;
-                                                checkOnlyOnePixel = 1;
-                                            }
-
-                                            sample_image_pixel_offset<
-                                                unsigned int>(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, lod);
-
-                                            error = errMax(
-                                                errMax(
-                                                    abs_diff_uint(expected[0],
-                                                                  resultPtr[0]),
-                                                    abs_diff_uint(
-                                                        expected[1],
-                                                        resultPtr[1])),
-                                                errMax(
-                                                    abs_diff_uint(expected[2],
-                                                                  resultPtr[2]),
-                                                    abs_diff_uint(
-                                                        expected[3],
-                                                        resultPtr[3])));
-
-                                            if (error > MAX_ERR)
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        unsigned int>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        false, lod);
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-            else
-            /*
-             * INT output type
-             */
-            {
-                // Validate integer results
-                int *resultPtr = (int *)(char *)resultValues;
-                int expected[4];
-                float error;
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            for (float norm_offset_x = -NORM_OFFSET;
-                                 norm_offset_x <= NORM_OFFSET && !found_pixel
-                                 && !checkOnlyOnePixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -NORM_OFFSET;
-                                     norm_offset_y <= NORM_OFFSET
-                                     && !found_pixel && !checkOnlyOnePixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -NORM_OFFSET;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel && !checkOnlyOnePixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-
-                                        // If we are not on a GPU, or we are not
-                                        // normalized, then only test with
-                                        // offsets (0.0, 0.0) E.g., test one
-                                        // pixel.
-                                        if (!imageSampler->normalized_coords
-                                            || gDeviceType != CL_DEVICE_TYPE_GPU
-                                            || NORM_OFFSET == 0)
-                                        {
-                                            norm_offset_x = 0.0f;
-                                            norm_offset_y = 0.0f;
-                                            norm_offset_z = 0.0f;
-                                            checkOnlyOnePixel = 1;
-                                        }
-
-                                        sample_image_pixel_offset<int>(
-                                            imagePtr, imageInfo,
-                                            xOffsetValues[j], yOffsetValues[j],
-                                            zOffsetValues[j], norm_offset_x,
-                                            norm_offset_y, norm_offset_z,
-                                            imageSampler, expected, lod);
-
-                                        error = errMax(
-                                            errMax(abs_diff_int(expected[0],
-                                                                resultPtr[0]),
-                                                   abs_diff_int(expected[1],
-                                                                resultPtr[1])),
-                                            errMax(abs_diff_int(expected[2],
-                                                                resultPtr[2]),
-                                                   abs_diff_int(expected[3],
-                                                                resultPtr[3])));
-
-                                        if (error < MAX_ERR) found_pixel = 1;
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -NORM_OFFSET;
-                                     norm_offset_x <= NORM_OFFSET
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -NORM_OFFSET;
-                                         norm_offset_y <= NORM_OFFSET
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -NORM_OFFSET;
-                                             norm_offset_z <= NORM_OFFSET
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-
-                                            // If we are not on a GPU, or we are
-                                            // not normalized, then only test
-                                            // with offsets (0.0, 0.0) E.g.,
-                                            // test one pixel.
-                                            if (!imageSampler->normalized_coords
-                                                || gDeviceType
-                                                    != CL_DEVICE_TYPE_GPU
-                                                || NORM_OFFSET == 0
-                                                || NORM_OFFSET == 0
-                                                || NORM_OFFSET == 0)
-                                            {
-                                                norm_offset_x = 0.0f;
-                                                norm_offset_y = 0.0f;
-                                                norm_offset_z = 0.0f;
-                                                checkOnlyOnePixel = 1;
-                                            }
-
-                                            sample_image_pixel_offset<int>(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, lod);
-
-                                            error = errMax(
-                                                errMax(
-                                                    abs_diff_int(expected[0],
-                                                                 resultPtr[0]),
-                                                    abs_diff_int(expected[1],
-                                                                 resultPtr[1])),
-                                                errMax(
-                                                    abs_diff_int(expected[2],
-                                                                 resultPtr[2]),
-                                                    abs_diff_int(
-                                                        expected[3],
-                                                        resultPtr[3])));
-
-                                            if (error > MAX_ERR)
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        int>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        false, lod);
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-        }
-        {
-            nextLevelOffset += width_lod * height_lod * depth_lod
-                * get_pixel_size(imageInfo->format);
-            width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
-            height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
-            depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
-        }
-    }
-
-    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
-}
\ No newline at end of file

diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h
index e7ecbe0..1a1a8a1 100644
--- a/test_conformance/images/kernel_read_write/test_common.h
+++ b/test_conformance/images/kernel_read_write/test_common.h

@@ -1,231 +1,7 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 
 #include "../testBase.h"
 
 #define ABS_ERROR(result, expected) (fabs(expected - result))
-#define CLAMP(_val, _min, _max)                                                \
-    ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
-
-#define MAX_ERR 0.005f
-#define MAX_TRIES 1
-#define MAX_CLAMPED 1
 
 extern cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error);
-extern void read_image_pixel_float(void *imageData, image_descriptor *imageInfo,
-                                   int x, int y, int z, float *outData);
 
-extern bool gExtraValidateInfo;
-extern bool gDisableOffsets;
-extern bool gUseKernelSamplers;
-extern cl_mem_flags gMemFlagsToUse;
-extern int gtestTypesToRun;
-extern uint64_t gRoundingStartValue;
-extern bool gPrintOptions;
-
-extern int test_read_image(cl_context context, cl_command_queue queue,
-                           cl_kernel kernel, image_descriptor *imageInfo,
-                           image_sampler_data *imageSampler,
-                           bool useFloatCoords, ExplicitType outputType,
-                           MTdata d);
-
-extern void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                                  image_sampler_data *imageSampler,
-                                  float *xOffsets, float *yOffsets,
-                                  float *zOffsets, float xfract, float yfract,
-                                  float zfract, int normalized_coords, MTdata d,
-                                  int lod);
-
-template <class T>
-int determine_validation_error_offset(
-    void *imagePtr, image_descriptor *imageInfo,
-    image_sampler_data *imageSampler, T *resultPtr, T *expected, float error,
-    float x, float y, float z, float xAddressOffset, float yAddressOffset,
-    float zAddressOffset, size_t j, int &numTries, int &numClamped,
-    bool printAsFloat, int lod)
-{
-    int actualX, actualY, actualZ;
-    int found = debug_find_pixel_in_image(imagePtr, imageInfo, resultPtr,
-                                          &actualX, &actualY, &actualZ, lod);
-    bool clampingErr = false, clamped = false, otherClampingBug = false;
-    int clampedX, clampedY, clampedZ;
-
-    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height,
-           imageDepth = imageInfo->depth;
-
-    clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
-                                        zAddressOffset, imageWidth, imageHeight,
-                                        imageDepth, imageSampler, imageInfo,
-                                        clampedX, clampedY, clampedZ);
-
-    if (found)
-    {
-        // Is it a clamping bug?
-        if (clamped && clampedX == actualX && clampedY == actualY
-            && clampedZ == actualZ)
-        {
-            if ((--numClamped) == 0)
-            {
-                if (printAsFloat)
-                {
-                    log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did "
-                              "not validate! Expected (%g,%g,%g,%g), got "
-                              "(%g,%g,%g,%g), error of %g\n",
-                              j, x, x, y, y, z, z, (float)expected[0],
-                              (float)expected[1], (float)expected[2],
-                              (float)expected[3], (float)resultPtr[0],
-                              (float)resultPtr[1], (float)resultPtr[2],
-                              (float)resultPtr[3], error);
-                }
-                else
-                {
-                    log_error(
-                        "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                        "validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
-                        j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
-                        (int)expected[2], (int)expected[3], (int)resultPtr[0],
-                        (int)resultPtr[1], (int)resultPtr[2],
-                        (int)resultPtr[3]);
-                }
-                log_error("ERROR: TEST FAILED: Read is erroneously clamping "
-                          "coordinates!\n");
-                return -1;
-            }
-            clampingErr = true;
-            otherClampingBug = true;
-        }
-    }
-    if (clamped && !otherClampingBug)
-    {
-        // If we are in clamp-to-edge mode and we're getting zeroes, it's
-        // possible we're getting border erroneously
-        if (resultPtr[0] == 0 && resultPtr[1] == 0 && resultPtr[2] == 0
-            && resultPtr[3] == 0)
-        {
-            if ((--numClamped) == 0)
-            {
-                if (printAsFloat)
-                {
-                    log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did "
-                              "not validate! Expected (%g,%g,%g,%g), got "
-                              "(%g,%g,%g,%g), error of %g\n",
-                              j, x, x, y, y, z, z, (float)expected[0],
-                              (float)expected[1], (float)expected[2],
-                              (float)expected[3], (float)resultPtr[0],
-                              (float)resultPtr[1], (float)resultPtr[2],
-                              (float)resultPtr[3], error);
-                }
-                else
-                {
-                    log_error(
-                        "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                        "validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
-                        j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
-                        (int)expected[2], (int)expected[3], (int)resultPtr[0],
-                        (int)resultPtr[1], (int)resultPtr[2],
-                        (int)resultPtr[3]);
-                }
-                log_error("ERROR: TEST FAILED: Clamping is erroneously "
-                          "returning border color!\n");
-                return -1;
-            }
-            clampingErr = true;
-        }
-    }
-    if (!clampingErr)
-    {
-        /*        if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 ||
-         (int)y + (int)yOffsetValues[ j ] < 0 ) )
-         {
-         log_error( "NEGATIVE COORDINATE ERROR\n" );
-         return -1;
-         }
-         */
-        if (true) // gExtraValidateInfo )
-        {
-            if (printAsFloat)
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%g,%g,%g,%g),\n\t     got "
-                          "(%g,%g,%g,%g), error of %g\n",
-                          j, x, x, y, y, z, z, (float)expected[0],
-                          (float)expected[1], (float)expected[2],
-                          (float)expected[3], (float)resultPtr[0],
-                          (float)resultPtr[1], (float)resultPtr[2],
-                          (float)resultPtr[3], error);
-            }
-            else
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%x,%x,%x,%x),\n\t     got "
-                          "(%x,%x,%x,%x)\n",
-                          j, x, x, y, y, z, z, (int)expected[0],
-                          (int)expected[1], (int)expected[2], (int)expected[3],
-                          (int)resultPtr[0], (int)resultPtr[1],
-                          (int)resultPtr[2], (int)resultPtr[3]);
-            }
-            log_error(
-                "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n",
-                clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
-                (int)imageDepth);
-
-            if (printAsFloat && gExtraValidateInfo)
-            {
-                log_error("\nNearby values:\n");
-                for (int zOff = -1; zOff <= 1; zOff++)
-                {
-                    for (int yOff = -1; yOff <= 1; yOff++)
-                    {
-                        float top[4], real[4], bot[4];
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX - 1, clampedY + yOff,
-                                               clampedZ + zOff, top);
-                        read_image_pixel_float(imagePtr, imageInfo, clampedX,
-                                               clampedY + yOff, clampedZ + zOff,
-                                               real);
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX + 1, clampedY + yOff,
-                                               clampedZ + zOff, bot);
-                        log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
-                                  top[3]);
-                        log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
-                                  real[3]);
-                        log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
-                                  bot[3]);
-                    }
-                }
-            }
-            //        }
-            //        else
-            //            log_error( "\n" );
-            if (imageSampler->filter_mode != CL_FILTER_LINEAR)
-            {
-                if (found)
-                    log_error(
-                        "\tValue really found in image at %d,%d,%d (%s)\n",
-                        actualX, actualY, actualZ,
-                        (found > 1) ? "NOT unique!!" : "unique");
-                else
-                    log_error("\tValue not actually found in image\n");
-            }
-            log_error("\n");
-        }
-
-        numClamped = -1; // We force the clamped counter to never work
-        if ((--numTries) == 0) return -1;
-    }
-    return 0;
-}

diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 03ca959..0b7d424 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp

@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,7 +22,20 @@
     #include <setjmp.h>
 #endif
 
-extern bool gTestImage2DFromBuffer;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestImage2DFromBuffer, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
 
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
@@ -72,6 +85,8 @@
 static const char *offsetSource =
 "   int offset = tidY*get_image_width(input) + tidX;\n";
 
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData );
 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                 T *resultPtr, T * expected, float error,
                                 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
@@ -275,6 +290,8 @@
     return 0;
 }
 
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
 {
     size_t i = 0;
@@ -387,6 +404,9 @@
         }
     }
 }
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
 
 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
@@ -1154,11 +1174,8 @@
                 continue;
             if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
                 continue;
-            if (expected[j] != actual[j])
-            {
-                pass = false;
-                break;
-            }
+            pass = false;
+            break;
         }
     }
     return pass;
@@ -1176,11 +1193,8 @@
                 continue;
             if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
                 continue;
-            if (expected[j] != actual[j])
-            {
-                pass = false;
-                break;
-            }
+            pass = false;
+            break;
         }
     }
     return pass;
@@ -1414,20 +1428,11 @@
     if( gDebugTrace )
         log_info( " - Creating kernel arguments...\n" );
 
-    xOffsets =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * imageInfo->width * imageInfo->height,
-                       xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
-    yOffsets =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * imageInfo->width * imageInfo->height,
-                       yOffsetValues, &error);
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, yOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             get_explicit_type_size(outputType) * 4
-                                 * imageInfo->width * imageInfo->height,
-                             NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height, NULL, &error );
     test_error( error, "Unable to create result buffer" );
 
     // Create sampler to use
@@ -1557,11 +1562,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
 }
 
-int test_read_image_set_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler, bool floatCoords,
-                           ExplicitType outputType)
+int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                        bool floatCoords, ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -1661,8 +1663,7 @@
             gTestMipmaps?", lod":" ");
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     if( gTestSmallImages )

diff --git a/test_conformance/images/kernel_read_write/test_loops.cpp b/test_conformance/images/kernel_read_write/test_loops.cpp
index 795a9ed..0cf70a8 100644
--- a/test_conformance/images/kernel_read_write/test_loops.cpp
+++ b/test_conformance/images/kernel_read_write/test_loops.cpp

@@ -16,61 +16,43 @@
 #include "../testBase.h"
 #include "../common.h"
 
-extern cl_filter_mode gFilterModeToUse;
+extern cl_filter_mode     gFilterModeToUse;
 extern cl_addressing_mode gAddressModeToUse;
-extern int gNormalizedModeToUse;
-extern int gTypesToTest;
-extern int gtestTypesToRun;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order      gChannelOrderToUse;
 
-extern int test_read_image_set_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  bool floatCoords, ExplicitType outputType);
-extern int test_read_image_set_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  bool floatCoords, ExplicitType outputType);
-extern int test_read_image_set_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  bool floatCoords, ExplicitType outputType);
-extern int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        bool floatCoords,
-                                        ExplicitType outputType);
-extern int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        bool floatCoords,
-                                        ExplicitType outputType);
+extern bool gDebugTrace;
+extern bool gTestMipmaps;
 
-int test_read_image_type(cl_device_id device, cl_context context,
-                         cl_command_queue queue, const cl_image_format *format,
-                         bool floatCoords, image_sampler_data *imageSampler,
-                         ExplicitType outputType, cl_mem_object_type imageType)
+extern int  gtestTypesToRun;
+extern bool gDeviceLt20;
+
+extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue,  cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                        bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                        bool floatCoords, ExplicitType outputType );
+
+int test_read_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, bool floatCoords,
+                         image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
 {
     int ret = 0;
     cl_addressing_mode *addressModes = NULL;
 
-    // The sampler-less read image functions behave exactly as the corresponding
-    // read image functions described in section 6.13.14.2 that take integer
-    // coordinates and a sampler with filter mode set to CLK_FILTER_NEAREST,
-    // normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing
-    // mode to CLK_ADDRESS_NONE
-    cl_addressing_mode addressModes_rw[] = { CL_ADDRESS_NONE,
-                                             (cl_addressing_mode)-1 };
-    cl_addressing_mode addressModes_ro[] = {
-        /* CL_ADDRESS_CLAMP_NONE,*/ CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP,
-        CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1
-    };
+    // The sampler-less read image functions behave exactly as the corresponding read image functions
+    // described in section 6.13.14.2 that take integer coordinates and a sampler with filter mode set to
+    // CLK_FILTER_NEAREST, normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing mode to CLK_ADDRESS_NONE
+    cl_addressing_mode addressModes_rw[] = { CL_ADDRESS_NONE, (cl_addressing_mode)-1 };
+    cl_addressing_mode addressModes_ro[] = { /* CL_ADDRESS_CLAMP_NONE,*/ CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1 };
 
-    if (gtestTypesToRun & kReadWriteTests)
+    if(gtestTypesToRun & kReadWriteTests)
     {
         addressModes = addressModes_rw;
     }
@@ -79,45 +61,39 @@
         addressModes = addressModes_ro;
     }
 
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     // According to the OpenCL specification, we do not guarantee the precision
     // of operations for linear filtering on the GPU.  We do not test linear
     // filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we
     // test it internally for a set of other image formats.
-    if ((gDeviceType == CL_DEVICE_TYPE_GPU)
-        && (imageSampler->filter_mode == CL_FILTER_LINEAR)
-        && (format->image_channel_order == CL_RGB)
-        && (format->image_channel_data_type == CL_UNORM_INT_101010))
+    if ((gDeviceType == CL_DEVICE_TYPE_GPU) &&
+        (imageSampler->filter_mode == CL_FILTER_LINEAR) &&
+        (format->image_channel_order == CL_RGB) &&
+        (format->image_channel_data_type == CL_UNORM_INT_101010))
     {
-        log_info("--- Skipping CL_RGB CL_UNORM_INT_101010 format with "
-                 "CL_FILTER_LINEAR on GPU.\n");
+        log_info("--- Skipping CL_RGB CL_UNORM_INT_101010 format with CL_FILTER_LINEAR on GPU.\n");
         return 0;
     }
 #endif
 
-    for (int adMode = 0; addressModes[adMode] != (cl_addressing_mode)-1;
-         adMode++)
+    for( int adMode = 0; addressModes[ adMode ] != (cl_addressing_mode)-1; adMode++ )
     {
-        imageSampler->addressing_mode = addressModes[adMode];
+        imageSampler->addressing_mode = addressModes[ adMode ];
 
-        if ((addressModes[adMode] == CL_ADDRESS_REPEAT
-             || addressModes[adMode] == CL_ADDRESS_MIRRORED_REPEAT)
-            && !(imageSampler->normalized_coords))
+        if( (addressModes[ adMode ] == CL_ADDRESS_REPEAT || addressModes[ adMode ] == CL_ADDRESS_MIRRORED_REPEAT) && !( imageSampler->normalized_coords ) )
             continue; // Repeat doesn't make sense for non-normalized coords
 
         // Use this run if we were told to only run a certain filter mode
-        if (gAddressModeToUse != (cl_addressing_mode)-1
-            && imageSampler->addressing_mode != gAddressModeToUse)
+        if( gAddressModeToUse != (cl_addressing_mode)-1 && imageSampler->addressing_mode != gAddressModeToUse )
             continue;
 
         /*
          Remove redundant check to see if workaround still necessary
-         // Check added in because this case was leaking through causing a crash
-         on CPU if( ! imageSampler->normalized_coords &&
-         imageSampler->addressing_mode == CL_ADDRESS_REPEAT ) continue; //repeat
-         mode requires normalized coordinates
+         // Check added in because this case was leaking through causing a crash on CPU
+         if( ! imageSampler->normalized_coords && imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
+         continue;       //repeat mode requires normalized coordinates
          */
-        print_read_header(format, imageSampler, false);
+        print_read_header( format, imageSampler, false );
 
         gTestCount++;
 
@@ -125,37 +101,27 @@
         switch (imageType)
         {
             case CL_MEM_OBJECT_IMAGE1D:
-                retCode = test_read_image_set_1D(device, context, queue, format,
-                                                 imageSampler, floatCoords,
-                                                 outputType);
+                retCode = test_read_image_set_1D( device, context, queue, format, imageSampler, floatCoords, outputType );
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                retCode = test_read_image_set_1D_array(device, context, queue,
-                                                       format, imageSampler,
-                                                       floatCoords, outputType);
+                retCode = test_read_image_set_1D_array( device, context, queue, format, imageSampler, floatCoords, outputType );
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
-                retCode = test_read_image_set_2D(device, context, queue, format,
-                                                 imageSampler, floatCoords,
-                                                 outputType);
+                retCode = test_read_image_set_2D( device, context, queue, format, imageSampler, floatCoords, outputType );
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                retCode = test_read_image_set_2D_array(device, context, queue,
-                                                       format, imageSampler,
-                                                       floatCoords, outputType);
+                retCode = test_read_image_set_2D_array( device, context, queue, format, imageSampler, floatCoords, outputType );
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
-                retCode = test_read_image_set_3D(device, context, queue, format,
-                                                 imageSampler, floatCoords,
-                                                 outputType);
+                retCode = test_read_image_set_3D( device, context, queue, format, imageSampler, floatCoords, outputType );
                 break;
         }
-        if (retCode != 0)
+        if( retCode != 0 )
         {
             gFailCount++;
-            log_error("FAILED: ");
-            print_read_header(format, imageSampler, true);
-            log_info("\n");
+            log_error( "FAILED: " );
+            print_read_header( format, imageSampler, true );
+            log_info( "\n" );
         }
         ret |= retCode;
     }
@@ -163,13 +129,8 @@
     return ret;
 }
 
-int test_read_image_formats(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const std::vector<cl_image_format> &formatList,
-                            const std::vector<bool> &filterFlags,
-                            image_sampler_data *imageSampler,
-                            ExplicitType outputType,
-                            cl_mem_object_type imageType)
+int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                            image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
 {
     int ret = 0;
     bool flipFlop[2] = { false, true };
@@ -177,52 +138,41 @@
 
 
     // Use this run if we were told to only run a certain filter mode
-    if (gFilterModeToUse != (cl_filter_mode)-1
-        && imageSampler->filter_mode != gFilterModeToUse)
+    if( gFilterModeToUse != (cl_filter_mode)-1 && imageSampler->filter_mode != gFilterModeToUse )
         return 0;
 
     // Test normalized/non-normalized
-    for (normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++)
+    for( normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++ )
     {
-        imageSampler->normalized_coords = flipFlop[normalizedIdx];
-        if (gNormalizedModeToUse != 7
-            && gNormalizedModeToUse != (int)imageSampler->normalized_coords)
+        imageSampler->normalized_coords = flipFlop[ normalizedIdx ];
+        if( gNormalizedModeToUse != 7 && gNormalizedModeToUse != (int)imageSampler->normalized_coords )
             continue;
 
-        for (floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++)
+        for( floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++ )
         {
-            // Checks added in because this case was leaking through causing a
-            // crash on CPU
-            if (!flipFlop[floatCoordIdx])
-                if (imageSampler->filter_mode != CL_FILTER_NEAREST
-                    || // integer coords can only be used with nearest
-                    flipFlop[normalizedIdx]) // Normalized integer coords makes
-                                             // no sense (they'd all be zero)
+            // Checks added in because this case was leaking through causing a crash on CPU
+            if( !flipFlop[ floatCoordIdx ] )
+                if( imageSampler->filter_mode != CL_FILTER_NEAREST      ||  // integer coords can only be used with nearest
+                   flipFlop[ normalizedIdx ])                               // Normalized integer coords makes no sense (they'd all be zero)
                     continue;
 
-            if (flipFlop[floatCoordIdx] && (gtestTypesToRun & kReadWriteTests))
+            if( flipFlop[ floatCoordIdx ] && (gtestTypesToRun & kReadWriteTests))
                 // sampler-less read in read_write tests run only integer coord
                 continue;
 
 
-            log_info("read_image (%s coords, %s results) "
-                     "*****************************\n",
-                     flipFlop[floatCoordIdx] ? (imageSampler->normalized_coords
-                                                    ? "normalized float"
-                                                    : "unnormalized float")
-                                             : "integer",
-                     get_explicit_type_name(outputType));
+            log_info( "read_image (%s coords, %s results) *****************************\n",
+                     flipFlop[ floatCoordIdx ] ? ( imageSampler->normalized_coords ? "normalized float" : "unnormalized float" ) : "integer",
+                     get_explicit_type_name( outputType ) );
 
-            for (unsigned int i = 0; i < formatList.size(); i++)
+            for( unsigned int i = 0; i < numFormats; i++ )
             {
-                if (filterFlags[i]) continue;
+                if( filterFlags[i] )
+                    continue;
 
-                const cl_image_format &imageFormat = formatList[i];
+                cl_image_format &imageFormat = formatList[ i ];
 
-                ret |=
-                    test_read_image_type(device, context, queue, &imageFormat,
-                                         flipFlop[floatCoordIdx], imageSampler,
-                                         outputType, imageType);
+                ret |= test_read_image_type( device, context, queue, &imageFormat, flipFlop[ floatCoordIdx ], imageSampler, outputType, imageType );
             }
         }
     }
@@ -230,74 +180,64 @@
 }
 
 
-int test_image_set(cl_device_id device, cl_context context,
-                   cl_command_queue queue, test_format_set_fn formatTestFn,
-                   cl_mem_object_type imageType)
+int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, test_format_set_fn formatTestFn, cl_mem_object_type imageType )
 {
     int ret = 0;
     static int printedFormatList = -1;
 
 
-    if ((imageType == CL_MEM_OBJECT_IMAGE3D)
-        && (formatTestFn == test_write_image_formats))
+    if ( ( 0 == is_extension_available( device, "cl_khr_3d_image_writes" )) && (imageType == CL_MEM_OBJECT_IMAGE3D) && (formatTestFn == test_write_image_formats) )
     {
-        if (0 == is_extension_available(device, "cl_khr_3d_image_writes"))
+        gFailCount++;
+        log_error( "-----------------------------------------------------\n" );
+        log_error( "FAILED: test writing CL_MEM_OBJECT_IMAGE3D images\n" );
+        log_error( "This device does not support the mandated extension cl_khr_3d_image_writes.\n");
+        log_error( "-----------------------------------------------------\n\n" );
+        return -1;
+    }
+
+    if ( gTestMipmaps )
+    {
+        if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
         {
-            log_info("-----------------------------------------------------\n");
-            log_info(
-                "This device does not support "
-                "cl_khr_3d_image_writes.\nSkipping 3d image write test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
+            log_info( "-----------------------------------------------------\n" );
+            log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
+            log_info( "-----------------------------------------------------\n\n" );
+            return 0;
+        }
+        if ( ( 0 == is_extension_available( device, "cl_khr_mipmap_image_writes" )) && (formatTestFn == test_write_image_formats))
+        {
+            log_info( "-----------------------------------------------------\n" );
+            log_info( "This device does not support cl_khr_mipmap_image_writes.\nSkipping mipmapped image write test. \n" );
+            log_info( "-----------------------------------------------------\n\n" );
             return 0;
         }
     }
 
-    if (gTestMipmaps)
-    {
-        if (0 == is_extension_available(device, "cl_khr_mipmap_image"))
-        {
-            log_info("-----------------------------------------------------\n");
-            log_info("This device does not support "
-                     "cl_khr_mipmap_image.\nSkipping mipmapped image test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
-            return 0;
-        }
-        if ((0 == is_extension_available(device, "cl_khr_mipmap_image_writes"))
-            && (formatTestFn == test_write_image_formats))
-        {
-            log_info("-----------------------------------------------------\n");
-            log_info("This device does not support "
-                     "cl_khr_mipmap_image_writes.\nSkipping mipmapped image "
-                     "write test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
-            return 0;
-        }
+    int version_check = (get_device_cl_version(device) < Version(1,2));
+    if (version_check != 0) {
+      switch (imageType) {
+        case CL_MEM_OBJECT_IMAGE1D:
+          test_missing_feature(version_check, "image_1D");
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+          test_missing_feature(version_check, "image_1D_array");
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+          test_missing_feature(version_check, "image_2D_array");
+      }
     }
 
-    int version_check = (get_device_cl_version(device) < Version(1, 2));
-    if (version_check != 0)
-    {
-        switch (imageType)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                test_missing_feature(version_check, "image_1D");
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                test_missing_feature(version_check, "image_1D_array");
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                test_missing_feature(version_check, "image_2D_array");
-        }
-    }
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
 
     // This flag is only for querying the list of supported formats
     // The flag for creating image will be set explicitly in test functions
     cl_mem_flags flags;
     const char *flagNames;
-    if (formatTestFn == test_read_image_formats)
+    if( formatTestFn == test_read_image_formats )
     {
-        if (gtestTypesToRun & kReadTests)
+        if(gtestTypesToRun & kReadTests)
         {
             flags = CL_MEM_READ_ONLY;
             flagNames = "read";
@@ -310,7 +250,7 @@
     }
     else
     {
-        if (gtestTypesToRun & kWriteTests)
+        if(gtestTypesToRun & kWriteTests)
         {
             flags = CL_MEM_WRITE_ONLY;
             flagNames = "write";
@@ -322,30 +262,33 @@
         }
     }
 
-    // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+    BufferOwningPtr<cl_image_format> formatListBuf(formatList);
 
-    // First time through, we'll go ahead and print the formats supported,
-    // regardless of type
-    int test = imageType
-        | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
-    if (printedFormatList != test)
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
     {
-        log_info("---- Supported %s %s formats for this device ---- \n",
-                 convert_image_type_to_string(imageType), flagNames);
-        for (unsigned int f = 0; f < formatList.size(); f++)
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+
+    // First time through, we'll go ahead and print the formats supported, regardless of type
+    int test = imageType | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
+    if( printedFormatList != test )
+    {
+        log_info( "---- Supported %s %s formats for this device ---- \n", convert_image_type_to_string(imageType), flagNames );
+        for( unsigned int f = 0; f < numFormats; f++ )
         {
-            if (IsChannelOrderSupported(formatList[f].image_channel_order)
-                && IsChannelTypeSupported(
-                    formatList[f].image_channel_data_type))
-                log_info(
-                    "  %-7s %-24s %d\n",
-                    GetChannelOrderName(formatList[f].image_channel_order),
-                    GetChannelTypeName(formatList[f].image_channel_data_type),
-                    (int)get_format_channel_count(&formatList[f]));
+            if ( IsChannelOrderSupported( formatList[ f ].image_channel_order ) && IsChannelTypeSupported( formatList[ f ].image_channel_data_type ) )
+                log_info( "  %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
+                        GetChannelTypeName( formatList[ f ].image_channel_data_type ),
+                        (int)get_format_channel_count( &formatList[ f ] ) );
         }
-        log_info("------------------------------------------- \n");
+        log_info( "------------------------------------------- \n" );
         printedFormatList = test;
     }
 
@@ -355,9 +298,8 @@
     {
         if (gTypesToTest & test.type)
         {
-            std::vector<bool> filterFlags(formatList.size(), false);
-            if (filter_formats(formatList, filterFlags, test.channelTypes,
-                               gTestMipmaps)
+            if (filter_formats(formatList, filterFlags, numFormats,
+                               test.channelTypes, gTestMipmaps)
                 == 0)
             {
                 log_info("No formats supported for %s type\n", test.name);
@@ -366,7 +308,7 @@
             {
                 imageSampler.filter_mode = CL_FILTER_NEAREST;
                 ret += formatTestFn(device, context, queue, formatList,
-                                    filterFlags, &imageSampler,
+                                    filterFlags, numFormats, &imageSampler,
                                     test.explicitType, imageType);
 
                 // Linear filtering is only supported with floats
@@ -374,7 +316,7 @@
                 {
                     imageSampler.filter_mode = CL_FILTER_LINEAR;
                     ret += formatTestFn(device, context, queue, formatList,
-                                        filterFlags, &imageSampler,
+                                        filterFlags, numFormats, &imageSampler,
                                         test.explicitType, imageType);
                 }
             }

diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index c9ba4e8..e2e36a6 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp

@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,6 +23,21 @@
     #include <setjmp.h>
 #endif
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
 const char *read1DKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
 "{\n"
@@ -51,6 +66,8 @@
 
 static const char *samplerKernelArg = " sampler_t imageSampler,";
 
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData );
 template <class T> int determine_validation_error_1D( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                 T *resultPtr, T * expected, float error,
                                 float x, float xAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
@@ -167,6 +184,8 @@
     return 0;
 }
 
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float xfract, int normalized_coords, MTdata d, int lod)
 {
     size_t i = 0;
@@ -209,6 +228,11 @@
     }
 }
 
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+
 int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel kernel,
                         image_descriptor *imageInfo, image_sampler_data *imageSampler,
                        bool useFloatCoords, ExplicitType outputType, MTdata d )
@@ -370,14 +394,9 @@
     if( gDebugTrace )
         log_info( " - Creating kernel arguments...\n" );
 
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width,
-                              xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             get_explicit_type_size(outputType) * 4
-                                 * imageInfo->width,
-                             NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width, NULL, &error );
     test_error( error, "Unable to create result buffer" );
 
     // Create sampler to use
@@ -979,11 +998,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
 }
 
-int test_read_image_set_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler, bool floatCoords,
-                           ExplicitType outputType)
+int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                        bool floatCoords, ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -1052,8 +1068,7 @@
 
     ptr = programSrc;
 
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     if(error)
     {
         exit(1);

diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index b3287de..eede817 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp

@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,6 +22,20 @@
 #include <setjmp.h>
 #endif
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
 
 const char *read1DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
@@ -59,6 +73,12 @@
 
 static const char *samplerKernelArg = " sampler_t imageSampler,";
 
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                   int x, int y, int z, float *outData );
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                   int x, int y, int z, float *outData , int lod);
+
 template <class T> int determine_validation_error_1D_arr( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                   T *resultPtr, T * expected, float error,
                                                   float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
@@ -198,6 +218,8 @@
     return 0;
 }
 
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d , int lod)
 {
     size_t i = 0;
@@ -255,6 +277,11 @@
     }
 }
 
+#ifndef MAX
+#define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+
 int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_kernel kernel,
                              image_descriptor *imageInfo, image_sampler_data *imageSampler,
                              bool useFloatCoords, ExplicitType outputType, MTdata d )
@@ -441,22 +468,16 @@
     if( gDebugTrace )
         log_info( " - Creating kernel arguments...\n" );
 
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->arraySize,
-                              xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
 
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->arraySize,
-                              yOffsetValues, &error);
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, yOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
 
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             get_explicit_type_size(outputType) * 4
-                                 * imageInfo->width * imageInfo->arraySize,
-                             NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+        get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->arraySize, NULL, &error );
     test_error( error, "Unable to create result buffer" );
 
     // Create sampler to use
@@ -1085,11 +1106,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
 }
 
-int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 bool floatCoords, ExplicitType outputType)
+int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                 bool floatCoords, ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -1158,8 +1176,7 @@
             gTestMipmaps ? ", lod" : "" );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     if( gTestSmallImages )

diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index 7cb334b..79420b4 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp

@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,6 +16,19 @@
 #include "test_common.h"
 #include <float.h>
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool         gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool         gUseKernelSamplers;
+extern cl_filter_mode   gFilterModeToUse;
+extern cl_addressing_mode   gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize) {
@@ -75,6 +88,7 @@
 
 static const char *samplerKernelArg = " sampler_t imageSampler,";
 
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
 template <class T> int determine_validation_error_offset_2D_array( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                          T *resultPtr, T * expected, float error,
                                                          float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
@@ -216,6 +230,8 @@
     return 0;
 }
 
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod)
 {
     size_t i = 0;
@@ -292,6 +308,10 @@
     }
 }
 
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
+#endif
+
 int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_kernel kernel,
                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
                        bool useFloatCoords, ExplicitType outputType, MTdata d )
@@ -458,26 +478,13 @@
         }
     }
 
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->arraySize,
-                              xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->arraySize,
-                              yOffsetValues, &error);
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, yOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
-    zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->arraySize,
-                              zOffsetValues, &error);
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, zOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
-    results =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       get_explicit_type_size(outputType) * 4 * imageInfo->width
-                           * imageInfo->height * imageInfo->arraySize,
-                       NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->arraySize, NULL, &error );
     test_error( error, "Unable to create result buffer" );
 
     // Create sampler to use
@@ -1297,11 +1304,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
 }
 
-int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 bool floatCoords, ExplicitType outputType)
+int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                           bool floatCoords, ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -1390,8 +1394,7 @@
             gTestMipmaps ? ", lod" : " " );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     // Run tests

diff --git a/test_conformance/images/kernel_read_write/test_read_3D.cpp b/test_conformance/images/kernel_read_write/test_read_3D.cpp
index 860114f..0b9e8de 100644
--- a/test_conformance/images/kernel_read_write/test_read_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp

@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,6 +16,21 @@
 #include "test_common.h"
 #include <float.h>
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize, RandomSeed& seed) {
@@ -73,12 +88,1083 @@
 
 static const char *samplerKernelArg = " sampler_t imageSampler,";
 
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_offset( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                         T *resultPtr, T * expected, float error,
+                                                         float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
+{
+    int actualX, actualY, actualZ;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, clampedZ;
 
-int test_read_image_set_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler, bool floatCoords,
-                           ExplicitType outputType)
+    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->depth;
+
+    clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates!\n" );
+                return -1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color!\n" );
+                return -1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        /*        if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || (int)y + (int)yOffsetValues[ j ] < 0 ) )
+         {
+         log_error( "NEGATIVE COORDINATE ERROR\n" );
+         return -1;
+         }
+         */
+        if( true ) // gExtraValidateInfo )
+        {
+            if( printAsFloat )
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\t     got (%g,%g,%g,%g), error of %g\n",
+                          j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                          (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+            }
+            else
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\t     got (%x,%x,%x,%x)\n",
+                          j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                          (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+            }
+            log_error( "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth );
+
+            if( printAsFloat && gExtraValidateInfo )
+            {
+                log_error( "\nNearby values:\n" );
+                for( int zOff = -1; zOff <= 1; zOff++ )
+                {
+                    for( int yOff = -1; yOff <= 1; yOff++ )
+                    {
+                        float top[ 4 ], real[ 4 ], bot[ 4 ];
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot );
+                        log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                        log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                        log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] );
+                    }
+                }
+            }
+            //        }
+            //        else
+            //            log_error( "\n" );
+            if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+            {
+                if( found )
+                    log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
+                else
+                    log_error( "\tValue not actually found in image\n" );
+            }
+            log_error( "\n" );
+        }
+
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+            return -1;
+    }
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod)
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) x);
+                    yOffsets[ i ] = (float) (yfract + (double) y);
+                    zOffsets[ i ] = (float) (zfract + (double) z);
+                }
+            }
+        }
+    }
+    else
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                    yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+                    zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d )));
+                }
+            }
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+                    yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) imageInfo->height - 1.0);
+                    zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->depth - 1.0);
+                }
+            }
+        }
+    }
+
+    if( normalized_coords || gTestMipmaps)
+    {
+        i = 0;
+        if (lod == 0)
+        {
+            for( size_t z = 0; z < imageInfo->depth; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                    {
+                        xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                        yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
+                        zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) imageInfo->depth);
+                    }
+                }
+            }
+        }
+        else if (gTestMipmaps)
+        {
+            size_t width_lod, height_lod, depth_lod;
+
+            width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
+            height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
+            depth_lod = (imageInfo->depth >> lod)?(imageInfo->depth >> lod):1;
+
+            for( size_t z = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, i++ )
+                    {
+                        xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod);
+                        yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) height_lod);
+                        zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) depth_lod);
+                    }
+                }
+            }
+        }
+    }
+}
+
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+int test_read_image_3D( cl_context context, cl_command_queue queue, cl_kernel kernel,
+                       image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[3];
+    static int initHalf = 0;
+
+    cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
+
+    clMemWrapper xOffsets, yOffsets, zOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // Create offset data
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        image_read_write_flags = CL_MEM_READ_ONLY;
+    }
+    else
+    {
+        image_read_write_flags = CL_MEM_READ_WRITE;
+    }
+
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            unprotImage = create_image_3d(  context,
+                                            image_read_write_flags | CL_MEM_USE_HOST_PTR,
+                                            imageInfo->format,
+                                            imageInfo->width,
+                                            imageInfo->height,
+                                            imageInfo->depth,
+                                            ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                            ( gEnablePitch ? imageInfo->slicePitch : 0 ), maxImageUseHostPtrBackingStore, &error );
+        } else {
+            error = protImage.Create( context,
+                                    (cl_mem_flags)(image_read_write_flags),
+                                    imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_3d( context,
+                                      image_read_write_flags | CL_MEM_COPY_HOST_PTR,
+                                      imageInfo->format,
+                                      imageInfo->width,
+                                      imageInfo->height,
+                                      imageInfo->depth,
+                                      ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        if ( !gTestMipmaps )
+        {
+            unprotImage = create_image_3d( context,
+                                          image_read_write_flags | gMemFlagsToUse,
+                                          imageInfo->format,
+                                          imageInfo->width, imageInfo->height, imageInfo->depth,
+                                          ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                          ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                          imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+        else
+        {
+            cl_image_desc image_desc = {0};
+            image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+            image_desc.image_width = imageInfo->width;
+            image_desc.image_height = imageInfo->height;
+            image_desc.image_depth = imageInfo->depth;
+            image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+
+            unprotImage = clCreateImage( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, &image_desc, NULL, &error);
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+    }
+
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        size_t origin[ 4 ] = { 0, 0, 0, 0};
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
+
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+
+        if ( !gTestMipmaps )
+        {
+
+            error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                        origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0,
+                                        imageValues , 0, NULL, NULL);
+
+            if (error != CL_SUCCESS)
+            {
+                log_error( "ERROR: Unable to write to 3D image of size %d x %d x %d \n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                return error;
+            }
+        }
+        else
+        {
+            int nextLevelOffset = 0;
+
+            for (int i =0; i < imageInfo->num_mip_levels; i++)
+            {   origin[3] = i;
+                error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                            origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                                            ((char*)imageValues + nextLevelOffset), 0, NULL, NULL);
+                if (error != CL_SUCCESS)
+                {
+                    log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                    return error;
+                }
+                nextLevelOffset += region[0]*region[1]*region[2]*get_pixel_size(imageInfo->format);
+                //Subsequent mip level dimensions keep halving
+                region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
+                region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
+                region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
+            }
+        }
+    }
+
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, zOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->depth, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+
+    // Create sampler to use
+    actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
+    test_error(error, "Unable to create image sampler");
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    int nextLevelOffset = 0;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
+
+    //Loop over all mipmap levels, if we are testing mipmapped images.
+    for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        size_t resultValuesSize = width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc( resultValuesSize ));
+        float lod_float = (float)lod;
+        if (gTestMipmaps) {
+            //Set the lod kernel arg
+            if(gDebugTrace)
+                log_info(" - Working at mip level %d\n", lod);
+            error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float);
+            test_error( error, "Unable to set kernel arguments" );
+        }
+
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues,
+                        q>=float_offset_count ? -offset: offset,
+                        q>=float_offset_count ? offset: -offset,
+                        q>=float_offset_count ? -offset: offset,
+                        imageSampler->normalized_coords, d, lod );
+
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, zOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write z offsets" );
+
+
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+        // Figure out thread dimensions
+        threads[0] = (size_t)width_lod;
+        threads[1] = (size_t)height_lod;
+        threads[2] = (size_t)depth_lod;
+
+        // Run the kernel
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+
+        // Get results
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+
+        // Validate results element by element
+        char *imagePtr = (char*)imageValues + nextLevelOffset;
+        /*
+         * FLOAT output type
+         */
+        if(is_sRGBA_order(imageInfo->format->image_channel_order) && (outputType == kFloat) )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                            )
+                            offset = 0.0f;          // Loop only once
+
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+
+                                    float err1 =
+                                        ABS_ERROR(sRGBmap(resultPtr[0]),
+                                                  sRGBmap(expected[0]));
+                                    float err2 =
+                                        ABS_ERROR(sRGBmap(resultPtr[1]),
+                                                  sRGBmap(expected[1]));
+                                    float err3 =
+                                        ABS_ERROR(sRGBmap(resultPtr[2]),
+                                                  sRGBmap(expected[2]));
+                                    float err4 =
+                                        ABS_ERROR(resultPtr[3], expected[3]);
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                                    float maxErr = 0.5;
+
+                                    if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                              maxErr += 4 * FLT_MIN;
+
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+
+                                            err1 =
+                                                ABS_ERROR(sRGBmap(resultPtr[0]),
+                                                          sRGBmap(expected[0]));
+                                            err2 =
+                                                ABS_ERROR(sRGBmap(resultPtr[1]),
+                                                          sRGBmap(expected[1]));
+                                            err3 =
+                                                ABS_ERROR(sRGBmap(resultPtr[2]),
+                                                          sRGBmap(expected[2]));
+                                            err4 = ABS_ERROR(resultPtr[3],
+                                                             expected[3]);
+                                        }
+                                    }
+
+                                    found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+
+                                        float err1 =
+                                            ABS_ERROR(sRGBmap(resultPtr[0]),
+                                                      sRGBmap(expected[0]));
+                                        float err2 =
+                                            ABS_ERROR(sRGBmap(resultPtr[1]),
+                                                      sRGBmap(expected[1]));
+                                        float err3 =
+                                            ABS_ERROR(sRGBmap(resultPtr[2]),
+                                                      sRGBmap(expected[2]));
+                                        float err4 = ABS_ERROR(resultPtr[3],
+                                                               expected[3]);
+                                        float maxErr = 0.6;
+
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                // If implementation decide to flush subnormals to zero,
+                                                // max error needs to be adjusted
+                                                  maxErr += 4 * FLT_MIN;
+
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+
+                                                err1 = ABS_ERROR(
+                                                    sRGBmap(resultPtr[0]),
+                                                    sRGBmap(expected[0]));
+                                                err2 = ABS_ERROR(
+                                                    sRGBmap(resultPtr[1]),
+                                                    sRGBmap(expected[1]));
+                                                err3 = ABS_ERROR(
+                                                    sRGBmap(resultPtr[2]),
+                                                    sRGBmap(expected[2]));
+                                                err4 = ABS_ERROR(resultPtr[3],
+                                                                 expected[3]);
+                                            }
+                                        }
+
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * FLOAT output type
+         */
+        else if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                            )
+                            offset = 0.0f;          // Loop only once
+
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+
+                                    float err1 =
+                                        ABS_ERROR(resultPtr[0], expected[0]);
+                                    float err2 =
+                                        ABS_ERROR(resultPtr[1], expected[1]);
+                                    float err3 =
+                                        ABS_ERROR(resultPtr[2], expected[2]);
+                                    float err4 =
+                                        ABS_ERROR(resultPtr[3], expected[3]);
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                    float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                    float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                    float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+                                    if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                              maxErr1 += 4 * FLT_MIN;
+                                            maxErr2 += 4 * FLT_MIN;
+                                            maxErr3 += 4 * FLT_MIN;
+                                            maxErr4 += 4 * FLT_MIN;
+
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+
+                                            err1 = ABS_ERROR(resultPtr[0],
+                                                             expected[0]);
+                                            err2 = ABS_ERROR(resultPtr[1],
+                                                             expected[1]);
+                                            err3 = ABS_ERROR(resultPtr[2],
+                                                             expected[2]);
+                                            err4 = ABS_ERROR(resultPtr[3],
+                                                             expected[3]);
+                                        }
+                                    }
+
+                                    found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+
+                                        float err1 = ABS_ERROR(resultPtr[0],
+                                                               expected[0]);
+                                        float err2 = ABS_ERROR(resultPtr[1],
+                                                               expected[1]);
+                                        float err3 = ABS_ERROR(resultPtr[2],
+                                                               expected[2]);
+                                        float err4 = ABS_ERROR(resultPtr[3],
+                                                               expected[3]);
+                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                  maxErr1 += 4 * FLT_MIN;
+                                                maxErr2 += 4 * FLT_MIN;
+                                                maxErr3 += 4 * FLT_MIN;
+                                                maxErr4 += 4 * FLT_MIN;
+
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+
+                                                err1 = ABS_ERROR(resultPtr[0],
+                                                                 expected[0]);
+                                                err2 = ABS_ERROR(resultPtr[1],
+                                                                 expected[1]);
+                                                err3 = ABS_ERROR(resultPtr[2],
+                                                                 expected[2]);
+                                                err4 = ABS_ERROR(resultPtr[3],
+                                                                 expected[3]);
+                                            }
+                                        }
+
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+
+                                    sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                            xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                            norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                            imageSampler, expected, lod );
+
+                                    error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+
+                                        sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                imageSampler, expected, lod );
+
+                                        error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                             norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                             j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        else
+        /*
+         * INT output type
+         */
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+
+                                    sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                   xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                   norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                   imageSampler, expected, lod );
+
+                                    error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+
+                                        sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                       imageSampler, expected, lod );
+
+                                        error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                    j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                        }
+                    }
+                }
+            }
+        }
+        {
+            nextLevelOffset += width_lod * height_lod * depth_lod * get_pixel_size(imageInfo->format);
+            width_lod = ( width_lod >> 1) ?( width_lod >> 1) : 1;
+            height_lod = ( height_lod >> 1) ?( height_lod >> 1) : 1;
+            depth_lod = ( depth_lod >> 1) ?( depth_lod >> 1) : 1;
+        }
+    }
+
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                           bool floatCoords, ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -148,8 +1234,7 @@
             gTestMipmaps? ",lod":" ");
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     // Run tests
@@ -169,9 +1254,7 @@
 
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
-                    int retCode = test_read_image(
-                        context, queue, kernel, &imageInfo, imageSampler,
-                        floatCoords, outputType, seed);
+                    int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
                     if( retCode )
                         return retCode;
                 }
@@ -198,9 +1281,7 @@
             log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
-            int retCode =
-                test_read_image(context, queue, kernel, &imageInfo,
-                                imageSampler, floatCoords, outputType, seed);
+            int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
         }
@@ -214,9 +1295,7 @@
 
         imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
         imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
-        int retCode =
-            test_read_image(context, queue, kernel, &imageInfo, imageSampler,
-                            floatCoords, outputType, seed);
+        int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
         if( retCode )
             return retCode;
     }
@@ -266,9 +1345,7 @@
                 if ( gTestMipmaps )
                     log_info( "   and number of mip levels :%d\n", (int)imageInfo.num_mip_levels );
             }
-            int retCode =
-                test_read_image(context, queue, kernel, &imageInfo,
-                                imageSampler, floatCoords, outputType, seed);
+            int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
         }

diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
index 41983ed..ca02262 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp

@@ -19,8 +19,12 @@
 #include <sys/mman.h>
 #endif
 
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
+
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor* imageInfo );
@@ -267,10 +271,8 @@
             clMemWrapper inputStream;
 
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                         get_explicit_type_size(inputType) * 4
-                                             * width_lod,
-                                         imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
 
             // Set arguments
@@ -470,18 +472,8 @@
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
                                     case CL_HALF_FLOAT:
-                                        log_error("    Expected: 0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultBuffer)[0],
-                                                  ((cl_half *)resultBuffer)[1],
-                                                  ((cl_half *)resultBuffer)[2],
-                                                  ((cl_half *)resultBuffer)[3]);
-                                        log_error("    Actual:   0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultPtr)[0],
-                                                  ((cl_half *)resultPtr)[1],
-                                                  ((cl_half *)resultPtr)[2],
-                                                  ((cl_half *)resultPtr)[3]);
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
                                     case CL_UNSIGNED_INT32:
@@ -521,10 +513,7 @@
     return totalErrors;
 }
 
-int test_write_image_1D_set(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const cl_image_format *format,
-                            ExplicitType inputType, MTdata d)
+int test_write_image_1D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
 {
     char programSrc[10240];
     const char *ptr;
@@ -582,8 +571,7 @@
              gTestMipmaps ? ", lod" :"" );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     // Run tests

diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
index c771704..b91bf1c 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp

@@ -19,8 +19,11 @@
 #include <sys/mman.h>
 #endif
 
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -282,10 +285,8 @@
             clMemWrapper inputStream;
 
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                         get_explicit_type_size(inputType) * 4
-                                             * width_lod * imageInfo->arraySize,
-                                         imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * imageInfo->arraySize, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
 
             // Set arguments
@@ -490,18 +491,8 @@
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
                                     case CL_HALF_FLOAT:
-                                        log_error("    Expected: 0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultBuffer)[0],
-                                                  ((cl_half *)resultBuffer)[1],
-                                                  ((cl_half *)resultBuffer)[2],
-                                                  ((cl_half *)resultBuffer)[3]);
-                                        log_error("    Actual:   0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultPtr)[0],
-                                                  ((cl_half *)resultPtr)[1],
-                                                  ((cl_half *)resultPtr)[2],
-                                                  ((cl_half *)resultPtr)[3]);
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
                                     case CL_UNSIGNED_INT32:
@@ -542,10 +533,7 @@
 }
 
 
-int test_write_image_1D_array_set(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  ExplicitType inputType, MTdata d)
+int test_write_image_1D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
 {
     char programSrc[10240];
     const char *ptr;
@@ -605,8 +593,7 @@
              gTestMipmaps ? ", lod" :"" );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     // Run tests

diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
index 08a7a80..4524c6c 100644
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp

@@ -19,8 +19,12 @@
 #include <sys/mman.h>
 #endif
 
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
+
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -301,11 +305,8 @@
             clMemWrapper inputStream;
 
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               get_explicit_type_size(inputType) * 4 * width_lod
-                                   * height_lod * imageInfo->arraySize,
-                               imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * imageInfo->arraySize, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
 
             // Set arguments
@@ -513,20 +514,8 @@
                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                             break;
                                         case CL_HALF_FLOAT:
-                                            log_error(
-                                                "    Expected: 0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultBuffer)[0],
-                                                ((cl_half *)resultBuffer)[1],
-                                                ((cl_half *)resultBuffer)[2],
-                                                ((cl_half *)resultBuffer)[3]);
-                                            log_error(
-                                                "    Actual:   0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultPtr)[0],
-                                                ((cl_half *)resultPtr)[1],
-                                                ((cl_half *)resultPtr)[2],
-                                                ((cl_half *)resultPtr)[3]);
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                             break;
                                         case CL_UNSIGNED_INT32:
@@ -568,10 +557,7 @@
 }
 
 
-int test_write_image_2D_array_set(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  ExplicitType inputType, MTdata d)
+int test_write_image_2D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
 {
     char programSrc[10240];
     const char *ptr;
@@ -632,8 +618,7 @@
              gTestMipmaps ? ", lod" : "" );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     // Run tests

diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
index 5cc96bb..7440bd6 100644
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp

@@ -19,8 +19,12 @@
 #include <sys/mman.h>
 #endif
 
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
+
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -307,11 +311,8 @@
             clMemWrapper inputStream;
 
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               get_explicit_type_size(inputType) * 4 * width_lod
-                                   * height_lod * depth_lod,
-                               imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * depth_lod, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
 
             // Set arguments
@@ -520,20 +521,8 @@
                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                             break;
                                         case CL_HALF_FLOAT:
-                                            log_error(
-                                                "    Expected: 0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultBuffer)[0],
-                                                ((cl_half *)resultBuffer)[1],
-                                                ((cl_half *)resultBuffer)[2],
-                                                ((cl_half *)resultBuffer)[3]);
-                                            log_error(
-                                                "    Actual:   0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultPtr)[0],
-                                                ((cl_half *)resultPtr)[1],
-                                                ((cl_half *)resultPtr)[2],
-                                                ((cl_half *)resultPtr)[3]);
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                             break;
                                         case CL_UNSIGNED_INT32:
@@ -576,10 +565,7 @@
 }
 
 
-int test_write_image_3D_set(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const cl_image_format *format,
-                            ExplicitType inputType, MTdata d)
+int test_write_image_3D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
 {
     char programSrc[10240];
     const char *ptr;
@@ -638,8 +624,7 @@
              gTestMipmaps ? ", lod" : "" );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     // Run tests

diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index e40e80d..f6d9235 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp

@@ -19,28 +19,16 @@
 #include <sys/mman.h>
 #endif
 
-extern bool gTestImage2DFromBuffer;
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestImage2DFromBuffer, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 
-extern int test_write_image_1D_set(cl_device_id device, cl_context context,
-                                   cl_command_queue queue,
-                                   const cl_image_format *format,
-                                   ExplicitType inputType, MTdata d);
-extern int test_write_image_3D_set(cl_device_id device, cl_context context,
-                                   cl_command_queue queue,
-                                   const cl_image_format *format,
-                                   ExplicitType inputType, MTdata d);
-extern int test_write_image_1D_array_set(cl_device_id device,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         const cl_image_format *format,
-                                         ExplicitType inputType, MTdata d);
-extern int test_write_image_2D_array_set(cl_device_id device,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         const cl_image_format *format,
-                                         ExplicitType inputType, MTdata d);
+extern int test_write_image_1D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_3D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_1D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_2D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
 
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -345,11 +333,8 @@
 
             char *imagePtrOffset = imageValues + nextLevelOffset;
 
-            inputStream =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               get_explicit_type_size(inputType) * channel_scale
-                                   * width_lod * height_lod,
-                               imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * channel_scale * width_lod * height_lod, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
 
             // Set arguments
@@ -553,18 +538,8 @@
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
                                     case CL_HALF_FLOAT:
-                                        log_error("    Expected: 0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultBuffer)[0],
-                                                  ((cl_half *)resultBuffer)[1],
-                                                  ((cl_half *)resultBuffer)[2],
-                                                  ((cl_half *)resultBuffer)[3]);
-                                        log_error("    Actual:   0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultPtr)[0],
-                                                  ((cl_half *)resultPtr)[1],
-                                                  ((cl_half *)resultPtr)[2],
-                                                  ((cl_half *)resultPtr)[3]);
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
                                     case CL_UNSIGNED_INT32:
@@ -609,9 +584,7 @@
 }
 
 
-int test_write_image_set(cl_device_id device, cl_context context,
-                         cl_command_queue queue, const cl_image_format *format,
-                         ExplicitType inputType, MTdata d)
+int test_write_image_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
 {
     char programSrc[10240];
     const char *ptr;
@@ -697,8 +670,7 @@
              gTestMipmaps ? ", lod" : "" );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
 
     // Run tests
@@ -813,13 +785,8 @@
     return 0;
 }
 
-int test_write_image_formats(cl_device_id device, cl_context context,
-                             cl_command_queue queue,
-                             const std::vector<cl_image_format> &formatList,
-                             const std::vector<bool> &filterFlags,
-                             image_sampler_data *imageSampler,
-                             ExplicitType inputType,
-                             cl_mem_object_type imageType)
+int test_write_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                             image_sampler_data *imageSampler, ExplicitType inputType, cl_mem_object_type imageType )
 {
     if( imageSampler->filter_mode == CL_FILTER_LINEAR )
         // No need to run for linear filters
@@ -832,9 +799,9 @@
 
     RandomSeed seed( gRandomSeed );
 
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
     {
-        const cl_image_format &imageFormat = formatList[i];
+        cl_image_format &imageFormat = formatList[ i ];
 
         if( filterFlags[ i ] )
             continue;

diff --git a/test_conformance/images/samplerlessReads/main.cpp b/test_conformance/images/samplerlessReads/main.cpp
index cd37779..cc882f3 100644
--- a/test_conformance/images/samplerlessReads/main.cpp
+++ b/test_conformance/images/samplerlessReads/main.cpp

@@ -13,11 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/fpcontrol.h"
 #include "../harness/parseParameters.h"
 
@@ -36,6 +42,9 @@
 cl_channel_type     gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order    gChannelOrderToUse = (cl_channel_order)-1;
 bool                gEnablePitch = false;
+bool                gDeviceLt20 = false;
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
 
 static void printUsage( const char *execName );
 
@@ -151,8 +160,7 @@
     FPU_mode_type oldMode;
     DisableFTZ(&oldMode);
 
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
 
     // Restore FP state before leaving
     RestoreFPState(&oldMode);
@@ -178,7 +186,7 @@
     log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
     log_info( "\n" );
     log_info( "\tThe following modify the types of images tested:\n" );
-    log_info( "\t\tread_write - Runs the tests with read_write images which allow a kernel do both read and write to the same image \n" );
+    log_info( "\t\read_write - Runs the tests with read_write images which allow a kernel do both read and write to the same image \n" );
     log_info( "\t\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
     log_info( "\t\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
     log_info( "\n" );

diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp
index 55eaaf4..857fbc6 100644
--- a/test_conformance/images/samplerlessReads/test_iterations.cpp
+++ b/test_conformance/images/samplerlessReads/test_iterations.cpp

@@ -22,7 +22,14 @@
     #include <setjmp.h>
 #endif
 
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool                 gTestReadWrite;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 
 const char *read2DKernelSourcePattern =
 "__kernel void sample_kernel( read_only %s input, sampler_t sampler, __global int *results )\n"
@@ -176,11 +183,8 @@
     return 0;
 }
 
-int test_read_image_set_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler,
-                           ExplicitType outputType)
+int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -197,11 +201,6 @@
     image_descriptor imageInfo = { 0 };
     size_t pixelSize;
 
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
     imageInfo.format = format;
     imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
@@ -256,8 +255,7 @@
     }
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
 
     if ( gTestSmallImages )

diff --git a/test_conformance/images/samplerlessReads/test_loops.cpp b/test_conformance/images/samplerlessReads/test_loops.cpp
index db49a8f..e50d5d4 100644
--- a/test_conformance/images/samplerlessReads/test_loops.cpp
+++ b/test_conformance/images/samplerlessReads/test_loops.cpp

@@ -16,45 +16,24 @@
 #include "../testBase.h"
 #include "../common.h"
 
-extern int gTypesToTest;
-extern bool gTestReadWrite;
+extern int                  gTypesToTest;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order     gChannelOrderToUse;
 
-extern int test_read_image_set_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType);
-extern int test_read_image_set_1D_buffer(cl_device_id device,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         const cl_image_format *format,
-                                         image_sampler_data *imageSampler,
-                                         ExplicitType outputType);
-extern int test_read_image_set_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType);
-extern int test_read_image_set_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType);
-extern int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        ExplicitType outputType);
-extern int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        ExplicitType outputType);
+extern bool                 gDebugTrace;
+extern bool                 gDeviceLt20;
 
-int test_read_image_type(cl_device_id device, cl_context context,
-                         cl_command_queue queue, const cl_image_format *format,
-                         image_sampler_data *imageSampler,
-                         ExplicitType outputType, cl_mem_object_type imageType)
+extern bool                 gTestReadWrite;
+
+extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_1D_buffer( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+
+int test_read_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format,
+                          image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
 {
     int ret = 0;
     imageSampler->addressing_mode = CL_ADDRESS_NONE;
@@ -95,25 +74,20 @@
     return ret;
 }
 
-int test_read_image_formats(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const std::vector<cl_image_format> &formatList,
-                            const std::vector<bool> &filterFlags,
-                            image_sampler_data *imageSampler,
-                            ExplicitType outputType,
-                            cl_mem_object_type imageType)
+int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                             image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
 {
     int ret = 0;
     imageSampler->normalized_coords = false;
     log_info( "read_image (%s coords, %s results) *****************************\n",
               "integer", get_explicit_type_name( outputType ) );
 
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for ( unsigned int i = 0; i < numFormats; i++ )
     {
         if ( filterFlags[i] )
             continue;
 
-        const cl_image_format &imageFormat = formatList[i];
+        cl_image_format &imageFormat = formatList[ i ];
 
         ret |= test_read_image_type( device, context, queue, &imageFormat, imageSampler, outputType, imageType );
     }
@@ -127,50 +101,39 @@
     static int printedFormatList = -1;
 
     // Grab the list of supported image formats
-    std::vector<cl_image_format> formatList;
-
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+    auto version = get_device_cl_version(device);
+    if (version < Version(2, 0)) {
+        gDeviceLt20 = true;
     }
 
-    std::vector<cl_image_format> readOnlyFormats;
-    if (get_format_list(context, imageType, readOnlyFormats, CL_MEM_READ_ONLY))
+    if (gDeviceLt20 && gTestReadWrite) {
+        log_info("TEST skipped, Opencl 2.0 + requried for this test");
+        return ret;
+    }
+
+    // This flag is only for querying the list of supported formats
+    // The flag for creating image will be set explicitly in test functions
+    cl_mem_flags flags = (gTestReadWrite)? CL_MEM_KERNEL_READ_AND_WRITE : CL_MEM_READ_ONLY;
+
+    if ( get_format_list( context, imageType, formatList, numFormats, flags ) )
         return -1;
 
-    if (gTestReadWrite)
+    filterFlags = new bool[ numFormats ];
+    if ( filterFlags == NULL )
     {
-        std::vector<cl_image_format> readWriteFormats;
-        if (get_format_list(context, imageType, readWriteFormats,
-                            CL_MEM_KERNEL_READ_AND_WRITE))
-            return -1;
-
-        // Keep only intersecting formats with read only and read write flags
-        for (unsigned int i = 0; i < readOnlyFormats.size(); i++)
-        {
-            for (unsigned int j = 0; j < readWriteFormats.size(); j++)
-            {
-                if (readOnlyFormats[i].image_channel_data_type
-                        == readWriteFormats[j].image_channel_data_type
-                    && readOnlyFormats[i].image_channel_order
-                        == readWriteFormats[j].image_channel_order)
-                {
-                    formatList.push_back(readOnlyFormats[i]);
-                    break;
-                }
-            }
-        }
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
     }
-    else
-    {
-        formatList = readOnlyFormats;
-    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
 
     // First time through, we'll go ahead and print the formats supported, regardless of type
     if ( printedFormatList != (int)imageType )
     {
         log_info( "---- Supported %s read formats for this device ---- \n", convert_image_type_to_string(imageType) );
-        for (unsigned int f = 0; f < formatList.size(); f++)
+        for ( unsigned int f = 0; f < numFormats; f++ )
             log_info( "  %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
                       GetChannelTypeName( formatList[ f ].image_channel_data_type ),
                       (int)get_format_channel_count( &formatList[ f ] ) );
@@ -184,8 +147,9 @@
     {
         if (gTypesToTest & test.type)
         {
-            std::vector<bool> filterFlags(formatList.size(), false);
-            if (filter_formats(formatList, filterFlags, test.channelTypes) == 0)
+            if (filter_formats(formatList, filterFlags, numFormats,
+                               test.channelTypes)
+                == 0)
             {
                 log_info("No formats supported for %s type\n", test.name);
             }
@@ -193,11 +157,14 @@
             {
                 imageSampler.filter_mode = CL_FILTER_NEAREST;
                 ret += test_read_image_formats(
-                    device, context, queue, formatList, filterFlags,
+                    device, context, queue, formatList, filterFlags, numFormats,
                     &imageSampler, test.explicitType, imageType);
             }
         }
     }
 
+    delete[] filterFlags;
+    delete[] formatList;
+
     return ret;
 }

diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp
index aa261b7..173bc6f 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp

@@ -22,7 +22,14 @@
     #include <setjmp.h>
 #endif
 
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool                 gTestReadWrite;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 
 const char *read1DKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_t input, sampler_t sampler, __global int *results )\n"
@@ -177,11 +184,8 @@
     return 0;
 }
 
-int test_read_image_set_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler,
-                           ExplicitType outputType)
+int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -192,11 +196,6 @@
     RandomSeed seed( gRandomSeed );
     int error;
 
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
     // Get our operating params
     size_t maxWidth;
     cl_ulong maxAllocSize, memSize;
@@ -254,8 +253,7 @@
 
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
 
     if ( gTestSmallImages )

diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
index fb0c263..503a161 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp

@@ -22,7 +22,14 @@
     #include <setjmp.h>
 #endif
 
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool                 gTestReadWrite;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 
 const char *read1DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_array_t input, sampler_t sampler, __global int *results )\n"
@@ -166,8 +173,6 @@
 
     clReleaseSampler(actualSampler);
     clReleaseMemObject(results);
-    clReleaseMemObject(read_only_image);
-
     if(gTestReadWrite)
     {
         clReleaseMemObject(read_write_image);
@@ -175,11 +180,8 @@
     return 0;
 }
 
-int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 ExplicitType outputType)
+int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -196,11 +198,6 @@
     image_descriptor imageInfo = { 0 };
     size_t pixelSize;
 
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
     imageInfo.format = format;
     imageInfo.height = imageInfo.depth = 0;
     imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
@@ -253,8 +250,7 @@
 
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
 
     if ( gTestSmallImages )

diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
index 7a3084d..4cd02f0 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp

@@ -22,6 +22,13 @@
     #include <setjmp.h>
 #endif
 
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 
 const char *read1DBufferKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_buffer_t inputA, read_only image1d_t inputB, sampler_t sampler, __global int *results )\n"
@@ -160,11 +167,8 @@
     return 0;
 }
 
-int test_read_image_set_1D_buffer(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType)
+int test_read_image_set_1D_buffer( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -246,8 +250,7 @@
              readFormat );
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
 
     if ( gTestSmallImages )

diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
index 99f2426..22fcffc 100644
--- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp

@@ -16,7 +16,11 @@
 #include "../testBase.h"
 #include <float.h>
 
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool             gTestReadWrite;
 
 const char *read2DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only %s input, sampler_t sampler, __global int *results )\n"
@@ -161,11 +165,8 @@
     return 0;
 }
 
-int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 ExplicitType outputType)
+int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format,
+                                  image_sampler_data *imageSampler, ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -175,11 +176,6 @@
 
     int error;
 
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
     clProgramWrapper program;
     clKernelWrapper kernel;
 
@@ -243,8 +239,7 @@
 
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
 
 

diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp
index cf41140..142d722 100644
--- a/test_conformance/images/samplerlessReads/test_read_3D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp

@@ -16,7 +16,11 @@
 #include "../testBase.h"
 #include <float.h>
 
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool             gTestReadWrite;
 
 const char *read3DKernelSourcePattern =
 "__kernel void sample_kernel( read_only image3d_t input, sampler_t sampler, __global int *results )\n"
@@ -164,11 +168,8 @@
     return 0;
 }
 
-int test_read_image_set_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler,
-                           ExplicitType outputType)
+int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format,
+                            image_sampler_data *imageSampler, ExplicitType outputType )
 {
     char programSrc[10240];
     const char *ptr;
@@ -178,11 +179,6 @@
 
     int error;
 
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
     clProgramWrapper program;
     clKernelWrapper kernel;
 
@@ -246,8 +242,7 @@
 
 
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
 
 

diff --git a/test_conformance/images/testBase.h b/test_conformance/images/testBase.h
index ad48f10..f50f6b5 100644
--- a/test_conformance/images/testBase.h
+++ b/test_conformance/images/testBase.h

@@ -22,12 +22,6 @@
 #include "harness/clImageHelper.h"
 #include "harness/imageHelpers.h"
 
-extern bool gDebugTrace;
-extern bool gTestSmallImages;
-extern bool gEnablePitch;
-extern bool gTestMaxImages;
-extern bool gTestMipmaps;
-
 // Amount to offset pixels for checking normalized reads
 #define NORM_OFFSET 0.1f
 
@@ -64,22 +58,19 @@
     kAllTests = ( kReadTests | kWriteTests | kReadWriteTests )
 };
 
-typedef int (*test_format_set_fn)(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    const std::vector<cl_image_format> &formatList,
-    const std::vector<bool> &filterFlags, image_sampler_data *imageSampler,
-    ExplicitType outputType, cl_mem_object_type imageType);
+typedef int (*test_format_set_fn)( cl_device_id device, cl_context context, cl_command_queue queue,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
 
-extern int test_read_image_formats(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    const std::vector<cl_image_format> &formatList,
-    const std::vector<bool> &filterFlags, image_sampler_data *imageSampler,
-    ExplicitType outputType, cl_mem_object_type imageType);
-extern int test_write_image_formats(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    const std::vector<cl_image_format> &formatList,
-    const std::vector<bool> &filterFlags, image_sampler_data *imageSampler,
-    ExplicitType outputType, cl_mem_object_type imageType);
+extern int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
+extern int test_write_image_formats( cl_device_id device, cl_context context, cl_command_queue queue,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
 
 #endif // _testBase_h
 

diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp
index 00e9166..1a8bad5 100644
--- a/test_conformance/integer_ops/main.cpp
+++ b/test_conformance/integer_ops/main.cpp

@@ -212,6 +212,6 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/integer_ops/test_int_basic_ops.cpp b/test_conformance/integer_ops/test_int_basic_ops.cpp
index 519e5be..2d628d4 100644
--- a/test_conformance/integer_ops/test_int_basic_ops.cpp
+++ b/test_conformance/integer_ops/test_int_basic_ops.cpp

@@ -244,21 +244,18 @@
     (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
     pThis->m_output_ptr =
     (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
-    pThis->m_streams[0] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
+    pThis->m_streams[0] =
+    clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
 
     test_error(err, "clCreateBuffer failed");
 
-    pThis->m_streams[1] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        pThis->m_type_size * num_elements * inputBVecSize, NULL, &err);
+    pThis->m_streams[1] =
+    clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputBVecSize, NULL, &err );
 
     test_error(err, "clCreateBuffer failed");
 
-    pThis->m_streams[2] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        pThis->m_type_size * num_elements * vectorSize, NULL, &err);
+    pThis->m_streams[2] =
+    clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * vectorSize, NULL, &err );
 
     test_error(err, "clCreateBuffer failed");
 
@@ -1448,21 +1445,13 @@
     generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
     generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
 
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        type_size * num_elements * inputAVecSize, input_ptr[0], &err);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputAVecSize, input_ptr[0], &err);
     test_error(err, "clCreateBuffer failed");
-    streams[1] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        type_size * num_elements * inputBVecSize, input_ptr[1], &err);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputBVecSize, input_ptr[1], &err );
     test_error(err, "clCreateBuffer failed");
-    streams[2] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        type_size * num_elements * inputCVecSize, input_ptr[2], &err);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputCVecSize, input_ptr[2], &err );
     test_error(err, "clCreateBuffer failed");
-    streams[3] =
-        clCreateBuffer(context, CL_MEM_WRITE_ONLY,
-                       type_size * num_elements * vectorSize, NULL, &err);
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_WRITE_ONLY), type_size * num_elements * vectorSize, NULL, &err );
     test_error(err, "clCreateBuffer failed");
 
     const char *vectorString = sizeNames[ vectorSize ];

diff --git a/test_conformance/integer_ops/test_integers.cpp b/test_conformance/integer_ops/test_integers.cpp
index 8d77b24..620582c 100644
--- a/test_conformance/integer_ops/test_integers.cpp
+++ b/test_conformance/integer_ops/test_integers.cpp

@@ -84,10 +84,11 @@
                 useOpKernel ? fnName : "", useOpKernel ? "" : fnName, sizeName,
                 sizeName );
 
+    bool isOpenCL20Function = (strcmp(fnName,"ctz") == 0)? true: false;
+
     /* Create kernels */
     programPtr = kernelSource;
-    if (create_single_kernel_helper(context, &program, &kernel, 1,
-                                    (const char **)&programPtr, "sample_test"))
+    if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test", isOpenCL20Function ? "-cl-std=CL2.0": "" ) )
     {
         log_error("The program we attempted to compile was: \n%s\n", kernelSource);
         return -1;
@@ -96,9 +97,9 @@
     /* Generate some streams */
     generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataA );
 
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
@@ -110,10 +111,9 @@
         // Op kernels use an r/w buffer for the second param, so we need to init it with data
         generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataB );
     }
-    streams[1] = clCreateBuffer(
-        context, (CL_MEM_READ_WRITE | (useOpKernel ? CL_MEM_COPY_HOST_PTR : 0)),
-        get_explicit_type_size(vecType) * vecSize * TEST_SIZE,
-        (useOpKernel) ? &inDataB : NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | ( useOpKernel ? CL_MEM_COPY_HOST_PTR : 0 )),
+                                 get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                ( useOpKernel ) ? &inDataB : NULL, NULL );
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");
@@ -668,25 +668,25 @@
     generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
     generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
 
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
+                                &inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE,
+                                &inDataB, NULL);
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                 get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
+                                 NULL, NULL );
     if( streams[2] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");
@@ -1325,33 +1325,25 @@
     generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
     generate_random_data( vecCType, vecSize * TEST_SIZE, d, inDataC );
 
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE, &inDataA, NULL);
     if( streams[0] == NULL )
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE, &inDataB, NULL);
     if( streams[1] == NULL )
     {
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecCType) * vecSize * TEST_SIZE, &inDataC, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecCType ) * vecSize * TEST_SIZE, &inDataC, NULL);
     if( streams[2] == NULL )
     {
         log_error("ERROR: Creating input array C failed!\n");
         return -1;
     }
-    streams[3] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(destType) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( destType ) * vecSize * TEST_SIZE, NULL, NULL );
     if( streams[3] == NULL )
     {
         log_error("ERROR: Creating output array failed!\n");

diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp
index 72940ea..0b4d0b8 100644
--- a/test_conformance/integer_ops/test_unary_ops.cpp
+++ b/test_conformance/integer_ops/test_unary_ops.cpp

@@ -90,9 +90,9 @@
 
     // Generate two streams. The first is our random data to test against, the second is our control stream
     generate_random_data( vecType, vecSize * TEST_SIZE, d, inData );
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inData, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                 get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                 inData, &error );
     test_error( error, "Creating input data array failed" );
 
     cl_uint bits;
@@ -110,8 +110,8 @@
             // For addition ops, the MAX control value is 1. Otherwise, it's 3
             controlData[ i ] &= ~0x02;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(controlData), controlData, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                sizeof( controlData ), controlData, &error );
     test_error( error, "Unable to create control stream" );
 
     // Assign streams and execute

diff --git a/test_conformance/integer_ops/test_upsample.cpp b/test_conformance/integer_ops/test_upsample.cpp
index 9ae3f0c..2fbbcc4 100644
--- a/test_conformance/integer_ops/test_upsample.cpp
+++ b/test_conformance/integer_ops/test_upsample.cpp

@@ -92,24 +92,19 @@
     }
 
     /* Set up parameters */
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sourceATypeSize * sourceAVecSize * count, sourceA, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceATypeSize * sourceAVecSize * count, sourceA, NULL );
     if (!streams[0])
     {
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
     }
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sourceBTypeSize * sourceBVecSize * count, sourceB, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceBTypeSize * sourceBVecSize * count, sourceB, NULL );
     if (!streams[1])
     {
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, outStride * count,
-                                NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outStride * count, NULL, NULL );
     if (!streams[2])
     {
         log_error("ERROR: Creating output array failed!\n");

diff --git a/test_conformance/integer_ops/verification_and_generation_functions.cpp b/test_conformance/integer_ops/verification_and_generation_functions.cpp
index 25fbe71..23f3bdb 100644
--- a/test_conformance/integer_ops/verification_and_generation_functions.cpp
+++ b/test_conformance/integer_ops/verification_and_generation_functions.cpp

@@ -1481,8 +1481,8 @@
 verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n)
 {
     cl_uchar r;
-    cl_uint shift_mask = vector_size == 1 ? (cl_uint)(sizeof(cl_uint) * 8) - 1
-                                          : (cl_uint)(sizeof(cl_uchar) * 8) - 1;
+    cl_uint  shift_mask = vector_size == 1 ? (cl_uint)(sizeof(cl_uint)*8)-1
+    : (cl_uint)(sizeof(cl_uchar)*8)-1;;
     size_t   i, j;
     int      count=0;
 

diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index d8dfc40..8818039 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt

@@ -1,37 +1,35 @@
 set(MODULE_NAME BRUTEFORCE)
 
 set(${MODULE_NAME}_SOURCES
-    binary_double.cpp
-    binary_float.cpp
-    binary_i_double.cpp
-    binary_i_float.cpp
-    binary_operator_double.cpp
-    binary_operator_float.cpp
-    binary_two_results_i_double.cpp
-    binary_two_results_i_float.cpp
-    function_list.cpp
-    i_unary_double.cpp
-    i_unary_float.cpp
-    macro_binary_double.cpp
-    macro_binary_float.cpp
-    macro_unary_double.cpp
-    macro_unary_float.cpp
-    mad_double.cpp
-    mad_float.cpp
+    FunctionList.cpp
+    Sleep.cpp
+    binary.cpp
+    binaryOperator.cpp
+    Utility.cpp
+    binary_i.cpp
+    binary_two_results_i.cpp
+    i_unary.cpp
+    macro_binary.cpp
+    macro_unary.cpp
+    mad.cpp
     main.cpp
     reference_math.cpp
-    sleep.cpp
-    ternary_double.cpp
-    ternary_float.cpp
-    unary_double.cpp
-    unary_float.cpp
-    unary_two_results_double.cpp
-    unary_two_results_float.cpp
-    unary_two_results_i_double.cpp
-    unary_two_results_i_float.cpp
-    unary_u_double.cpp
-    unary_u_float.cpp
-    utility.cpp
+    ternary.cpp
+    unary.cpp
+    unary_two_results.cpp
+    unary_two_results_i.cpp
+    unary_u.cpp
 )
 
+if (NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
+set_source_files_properties(
+${MODULE_NAME}_SOURCES
+    COMPILE_FLAGS -march=i686)
+endif (NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
+
+if(CMAKE_COMPILER_IS_GNUCC)
+set_source_files_properties(
+        COMPILE_FLAGS -O0)
+endif(CMAKE_COMPILER_IS_GNUCC)
+
 include(../CMakeCommon.txt)

diff --git a/test_conformance/math_brute_force/FunctionList.cpp b/test_conformance/math_brute_force/FunctionList.cpp
new file mode 100644
index 0000000..30ee551
--- /dev/null
+++ b/test_conformance/math_brute_force/FunctionList.cpp

@@ -0,0 +1,205 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "FunctionList.h"
+#include "reference_math.h"
+
+#define FTZ_ON  1
+#define FTZ_OFF 0
+#define EXACT    0.0f
+#define RELAXED_ON 1
+#define RELAXED_OFF 0
+
+#define STRINGIFY( _s)                  #_s
+
+// Only use ulps information in spir test
+#ifdef FUNCTION_LIST_ULPS_ONLY
+
+#define ENTRY(      _name, _ulp, _embedded_ulp, _rmode, _type )                 { STRINGIFY(_name), STRINGIFY(_name),                 {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define ENTRY_EXT(  _name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type )   { STRINGIFY(_name), STRINGIFY(_name),                 {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, RELAXED_ON,  _type }
+#define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type )                 { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)    { STRINGIFY(_name), _operator,                        {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define unaryF                NULL
+#define i_unaryF              NULL
+#define unaryF_u              NULL
+#define macro_unaryF          NULL
+#define binaryF               NULL
+#define binaryF_nextafter     NULL
+#define binaryOperatorF       NULL
+#define binaryF_i             NULL
+#define macro_binaryF         NULL
+#define ternaryF              NULL
+#define unaryF_two_results    NULL
+#define unaryF_two_results_i  NULL
+#define binaryF_two_results_i NULL
+#define mad_function          NULL
+
+#define reference_sqrt        NULL
+#define reference_sqrtl       NULL
+#define reference_divide      NULL
+#define reference_dividel     NULL
+#define reference_relaxed_divide NULL
+
+#else // FUNCTION_LIST_ULPS_ONLY
+
+#define ENTRY(      _name, _ulp, _embedded_ulp, _rmode, _type )                 { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##_name},           _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define ENTRY_EXT(  _name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type )   { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##relaxed_##_name}, _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, RELAXED_ON,  _type }
+#define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type )                 { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {(void*)reference_##_name}, {NULL}, {NULL},                   _ulp, _ulp, _embedded_ulp, INFINITY, _rmode, RELAXED_OFF, _type }
+#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)    { STRINGIFY(_name), _operator,                        {(void*)reference_##_name}, {(void*)reference_##_name##l}, {NULL},                               _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+
+extern const vtbl _unary;               // float foo( float )
+extern const vtbl _unary_u;             // float foo( uint ),  double foo( ulong )
+extern const vtbl _i_unary;             // int foo( float )
+extern const vtbl _macro_unary;         // int foo( float ),  returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _binary;              // float foo( float, float )
+extern const vtbl _binary_nextafter;    // float foo( float, float ), special handling for nextafter
+extern const vtbl _binary_operator;     // float .op. float
+extern const vtbl _macro_binary;        // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _binary_i;            // float foo( float, int )
+extern const vtbl _ternary;             // float foo( float, float, float )
+extern const vtbl _unary_two_results;   // float foo( float, float * )
+extern const vtbl _unary_two_results_i; // float foo( float, int * )
+extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
+extern const vtbl _mad_tbl;             // float mad( float, float, float )
+
+#define unaryF &_unary
+#define i_unaryF &_i_unary
+#define unaryF_u  &_unary_u
+#define macro_unaryF &_macro_unary
+#define binaryF &_binary
+#define binaryF_nextafter &_binary_nextafter
+#define binaryOperatorF &_binary_operator
+#define binaryF_i &_binary_i
+#define macro_binaryF &_macro_binary
+#define ternaryF &_ternary
+#define unaryF_two_results  &_unary_two_results
+#define unaryF_two_results_i  &_unary_two_results_i
+#define binaryF_two_results_i  &_binary_two_results_i
+#define mad_function        &_mad_tbl
+
+#endif // FUNCTION_LIST_ULPS_ONLY
+
+const Func  functionList[] = {
+                                    ENTRY( acos,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( acosh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( acospi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asin,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asinh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asinpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atan,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atanh,                 5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atanpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atan2,                 6.0f,         6.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( atan2pi,               6.0f,         6.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( cbrt,                  2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( ceil,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( copysign,              0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY_EXT( cos,               4.0f,         4.0f,        0.00048828125f,        FTZ_OFF,     unaryF), //relaxed ulp 2^-11
+                                    ENTRY( cosh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( cospi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+//                                  ENTRY( erfc,                  16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
+//                                  ENTRY( erf,                   16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
+                                    ENTRY_EXT( exp,               3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
+                                    ENTRY_EXT( exp2,              3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
+                                    ENTRY_EXT( exp10,             3.0f,         4.0f,       8192.0f,    FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x)) in derived mode,
+                                    // in non-derived mode it uses the ulp error for half_exp10.
+                                    ENTRY( expm1,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fabs,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fdim,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( floor,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fma,                   0.0f,         0.0f,         FTZ_OFF,     ternaryF),
+                                    ENTRY( fmax,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( fmin,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( fmod,                  0.0f,         0.0f,         FTZ_OFF,     binaryF ),
+                                    ENTRY( fract,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results),
+                                    ENTRY( frexp,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results_i),
+                                    ENTRY( hypot,                 4.0f,         4.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( ilogb,                 0.0f,         0.0f,         FTZ_OFF,     i_unaryF),
+                                    ENTRY( isequal,               0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isfinite,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isgreater,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isgreaterequal,        0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isinf,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isless,                0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( islessequal,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( islessgreater,         0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isnan,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isnormal,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isnotequal,            0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isordered,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isunordered,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( ldexp,                 0.0f,         0.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( lgamma,            INFINITY,     INFINITY,         FTZ_OFF,     unaryF),
+                                    ENTRY( lgamma_r,          INFINITY,     INFINITY,         FTZ_OFF,     unaryF_two_results_i),
+                                    ENTRY_EXT( log,               3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
+                                    ENTRY_EXT( log2,              3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
+                                    ENTRY( log10,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( log1p,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( logb,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY_EXT( mad,           INFINITY,     INFINITY,        INFINITY,    FTZ_OFF,    mad_function), //in fast-relaxed-math mode it has to be either exactly rounded fma or exactly rounded a*b+c
+                                    ENTRY( maxmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
+                                    ENTRY( minmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
+                                    ENTRY( modf,                  0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results ),
+                                    ENTRY( nan,                   0.0f,         0.0f,         FTZ_OFF,     unaryF_u),
+                                    ENTRY( nextafter,             0.0f,         0.0f,         FTZ_OFF,     binaryF_nextafter),
+                                    ENTRY_EXT( pow,              16.0f,        16.0f,         8192.0f,     FTZ_OFF,    binaryF), //in derived mode the ulp error is calculated as exp2(y*log2(x)) and in non-derived it is the same as half_pow
+                                    ENTRY( pown,                 16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( powr,                 16.0f,        16.0f,         FTZ_OFF,     binaryF),
+//                                  ENTRY( reciprocal,            1.0f,         1.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( remainder,             0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( remquo,                0.0f,         0.0f,         FTZ_OFF,     binaryF_two_results_i),
+                                    ENTRY( rint,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( rootn,                16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( round,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( rsqrt,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( signbit,               0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY_EXT( sin,               4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF), //relaxed ulp 2^-11
+                                    ENTRY_EXT( sincos,            4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF_two_results), //relaxed ulp 2^-11
+                                    ENTRY( sinh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( sinpi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    { "sqrt", "sqrt",     {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 3.0f, 0.0f,    4.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
+                                    { "sqrt_cr", "sqrt",  {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 0.0f, 0.0f,    0.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
+                                    ENTRY_EXT( tan,               5.0f,         5.0f,         8192.0f,    FTZ_OFF,     unaryF), //in derived mode it the ulp error is calculated as sin/cos and in non-derived mode it is the same as half_tan.
+                                    ENTRY( tanh,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( tanpi,                 6.0f,         6.0f,         FTZ_OFF,     unaryF),
+//                                    ENTRY( tgamma,                 16.0f,         16.0f,         FTZ_OFF,     unaryF), // Commented this out until we can be sure this requirement is realistic
+                                    ENTRY( trunc,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+
+                                    HALF_ENTRY( cos,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( divide,        8192.0f,      8192.0f,          FTZ_ON,     binaryF),
+                                    HALF_ENTRY( exp,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( exp2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( exp10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( powr,          8192.0f,      8192.0f,          FTZ_ON,     binaryF),
+                                    HALF_ENTRY( recip,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( rsqrt,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( sin,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( sqrt,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( tan,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+
+                                    // basic operations
+                                    OPERATOR_ENTRY( add, "+",         0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    OPERATOR_ENTRY( subtract, "-",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    { "divide", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 2.5f, 0.0f,         3.0f, 2.5f, FTZ_OFF, RELAXED_ON, binaryOperatorF },
+                                    { "divide_cr", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 0.0f, 0.0f,         0.0f, 0.f, FTZ_OFF, RELAXED_OFF, binaryOperatorF },
+                                    OPERATOR_ENTRY( multiply, "*",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    OPERATOR_ENTRY( assignment, "", 0.0f,       0.0f,     FTZ_OFF,     unaryF),        // A simple copy operation
+                                    OPERATOR_ENTRY( not, "!",       0.0f,       0.0f,   FTZ_OFF,    macro_unaryF),
+                                };
+
+const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] );

diff --git a/test_conformance/math_brute_force/FunctionList.h b/test_conformance/math_brute_force/FunctionList.h
new file mode 100644
index 0000000..b2b0ec0
--- /dev/null
+++ b/test_conformance/math_brute_force/FunctionList.h

@@ -0,0 +1,98 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef FUNCTIONLIST_H
+#define FUNCTIONLIST_H
+
+#include "harness/compat.h"
+
+#ifndef WIN32
+#include <unistd.h>
+#endif
+
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/cl.h>
+#endif
+
+#include "harness/mt19937.h"
+
+typedef union fptr
+{
+    void    *p;
+    double  (*f_f)(double);
+    double  (*f_u)(cl_uint);
+    int     (*i_f)(double);
+    int     (*i_f_f)(float);
+    float   (*f_ff_f)(float, float);
+    double  (*f_ff)(double, double);
+    int     (*i_ff)(double, double);
+    double  (*f_fi)(double, int);
+    double  (*f_fpf)(double, double*);
+    double  (*f_fpI)(double, int*);
+    double  (*f_ffpI)(double, double, int*);
+    double  (*f_fff)(double, double, double );
+    float   (*f_fma)(float, float, float, int);
+}fptr;
+
+typedef union dptr
+{
+    void            *p;
+    long double     (*f_f)(long double);
+    long double     (*f_u)(cl_ulong);
+    int             (*i_f)(long double);
+    long double     (*f_ff)(long double, long double);
+    int             (*i_ff)(long double, long double);
+    long double     (*f_fi)(long double, int);
+    long double     (*f_fpf)(long double, long double*);
+    long double     (*f_fpI)(long double, int*);
+    long double     (*f_ffpI)(long double, long double, int*);
+    long double     (*f_fff)(long double, long double, long double);
+}dptr;
+
+struct Func;
+
+typedef struct vtbl
+{
+    const char  *type_name;
+    int         (*TestFunc)( const struct Func *, MTdata );
+    int         (*DoubleTestFunc)( const struct Func *, MTdata);        // may be NULL if function is single precision only
+}vtbl;
+
+typedef struct Func
+{
+  const char      *name;              // common name, to be used as an argument in the shell
+  const char      *nameInCode;        // name as it appears in the __kernel, usually the same as name, but different for multiplication
+  fptr            func;
+  dptr            dfunc;
+  fptr            rfunc;
+  float           float_ulps;
+  float           double_ulps;
+  float           float_embedded_ulps;
+  float           relaxed_error;
+  int             ftz;
+  int             relaxed;
+  const vtbl      *vtbl_ptr;
+}Func;
+
+
+extern const Func  functionList[];
+
+extern const size_t functionListCount;
+
+#endif
+
+

diff --git a/test_conformance/math_brute_force/Sleep.cpp b/test_conformance/math_brute_force/Sleep.cpp
new file mode 100644
index 0000000..4d3b2c6
--- /dev/null
+++ b/test_conformance/math_brute_force/Sleep.cpp

@@ -0,0 +1,118 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Sleep.h"
+#include "Utility.h"
+
+#if defined( __APPLE__ )
+    #include <IOKit/pwr_mgt/IOPMLib.h>
+    #include <IOKit/IOMessage.h>
+
+    struct
+    {
+        io_connect_t            connection;
+        IONotificationPortRef    port;
+        io_object_t                iterator;
+    }sleepInfo;
+
+    void sleepCallback(    void *            refcon,
+                        io_service_t        service,
+                        natural_t        messageType,
+                        void *            messageArgument );
+
+    void sleepCallback(    void *            refcon UNUSED,
+                        io_service_t        service UNUSED,
+                        natural_t        messageType,
+                        void *            messageArgument )
+    {
+
+        IOReturn result;
+    /*
+    service -- The IOService whose state has changed.
+    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
+    messageArgument -- An argument for the message, dependent on the messageType.
+    */
+        switch ( messageType )
+        {
+            case kIOMessageSystemWillSleep:
+                // Handle demand sleep (such as sleep caused by running out of
+                // batteries, closing the lid of a laptop, or selecting
+                // sleep from the Apple menu.
+                IOAllowPowerChange(sleepInfo.connection,(long)messageArgument);
+                vlog( "Hard sleep occurred.\n" );
+                break;
+            case kIOMessageCanSystemSleep:
+                // In this case, the computer has been idle for several minutes
+                // and will sleep soon so you must either allow or cancel
+                // this notification. Important: if you don’t respond, there will
+                // be a 30-second timeout before the computer sleeps.
+                // IOCancelPowerChange(root_port,(long)messageArgument);
+                result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
+                if( kIOReturnSuccess != result )
+                    vlog( "sleep prevention failed. (%d)\n", result);
+            break;
+            case kIOMessageSystemHasPoweredOn:
+                // Handle wakeup.
+                break;
+        }
+    }
+#endif
+
+
+
+
+
+void PreventSleep( void )
+{
+#if defined( __APPLE__ )
+    vlog( "Disabling sleep... " );
+    sleepInfo.iterator = (io_object_t) 0;
+    sleepInfo.port = NULL;
+    sleepInfo.connection = IORegisterForSystemPower
+                            (
+                                &sleepInfo,                    //void * refcon,
+                                &sleepInfo.port,            //IONotificationPortRef * thePortRef,
+                                sleepCallback,                //IOServiceInterestCallback callback,
+                                &sleepInfo.iterator            //io_object_t * notifier
+                            );
+
+    if( (io_connect_t) 0 == sleepInfo.connection )
+        vlog( "failed.\n" );
+    else
+        vlog( "done.\n" );
+
+    CFRunLoopAddSource(CFRunLoopGetCurrent(),
+                        IONotificationPortGetRunLoopSource(sleepInfo.port),
+                        kCFRunLoopDefaultMode);
+#else
+    vlog( "*** PreventSleep() is not implemented on this platform.\n" );
+#endif
+}
+
+void ResumeSleep( void )
+{
+#if defined( __APPLE__ )
+    IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator );
+    if( 0 != result )
+        vlog( "Got error %d restoring sleep \n", result );
+    else
+        vlog( "Sleep restored.\n" );
+#else
+    vlog( "*** ResumeSleep() is not implemented on this platform.\n" );
+#endif
+}
+
+
+

diff --git a/test_conformance/math_brute_force/Sleep.h b/test_conformance/math_brute_force/Sleep.h
new file mode 100644
index 0000000..f983a32
--- /dev/null
+++ b/test_conformance/math_brute_force/Sleep.h

@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef SLEEP_H
+#define SLEEP_H
+
+void PreventSleep( void );
+void ResumeSleep( void );
+
+#endif /* SLEEP_H */
+
+

diff --git a/test_conformance/math_brute_force/Utility.cpp b/test_conformance/math_brute_force/Utility.cpp
new file mode 100644
index 0000000..3c706fa
--- /dev/null
+++ b/test_conformance/math_brute_force/Utility.cpp

@@ -0,0 +1,169 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#if defined(__PPC__)
+// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// be used because not all Power implementations retain or observed the NI (non-IEEE
+// mode) bit.
+__thread fpu_control_t fpu_control = 0;
+#endif
+
+void MulD(double *rhi, double *rlo, double u, double v)
+{
+    const double c = 134217729.0; // 1+2^27
+    double up, u1, u2, vp, v1, v2;
+
+    up = u*c;
+    u1 = (u - up) + up;
+    u2 = u - u1;
+
+    vp = v*c;
+    v1 = (v - vp) + vp;
+    v2 = v - v1;
+
+    double rh = u*v;
+    double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2);
+
+    *rhi = rh;
+    *rlo = rl;
+}
+
+void AddD(double *rhi, double *rlo, double a, double b)
+{
+    double zhi, zlo;
+    zhi = a + b;
+    if(fabs(a) > fabs(b)) {
+        zlo = zhi - a;
+        zlo = b - zlo;
+    }
+    else {
+        zlo = zhi - b;
+        zlo = a - zlo;
+    }
+
+    *rhi = zhi;
+    *rlo = zlo;
+}
+
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
+{
+    double mh, ml;
+    double c = 134217729.0;
+    double up, u1, u2, vp, v1, v2;
+
+    up = xh*c;
+    u1 = (xh - up) + up;
+    u2 = xh - u1;
+
+    vp = yh*c;
+    v1 = (yh - vp) + vp;
+    v2 = yh - v1;
+
+    mh = xh*yh;
+    ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2);
+    ml += xh*yl + xl*yh;
+
+    *rhi = mh + ml;
+    *rlo = (mh - (*rhi)) + ml;
+}
+
+void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
+{
+    double r, s;
+    r = xh + yh;
+    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
+    *rhi = r + s;
+    *rlo = (r - (*rhi)) + s;
+}
+
+void DivideDD(double *chi, double *clo, double a, double b)
+{
+    *chi = a / b;
+    double rhi, rlo;
+    MulD(&rhi, &rlo, *chi, b);
+    AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
+    *clo = rhi / b;
+}
+
+// These functions comapre two floats/doubles. Since some platforms may choose to
+// flush denormals to zeros before comparison, comparison like a < b may give wrong
+// result in "certain cases" where we do need correct compasion result when operands
+// are denormals .... these functions comapre floats/doubles using signed integer/long int
+// rep. In other cases, when flushing to zeros is fine, these should not be used.
+// Also these doesn't check for nans and assume nans are handled separately as special edge case
+// by the caller which calls these functions
+// return 0 if both are equal, 1 if x > y and -1 if x < y.
+
+inline
+int compareFloats(float x, float y)
+{
+    int32f_t a, b;
+
+    a.f = x;
+    b.f = y;
+
+    if( a.i & 0x80000000 )
+        a.i = 0x80000000 - a.i;
+    if( b.i & 0x80000000 )
+        b.i = 0x80000000 - b.i;
+
+    if( a.i == b.i )
+        return 0;
+
+    return a.i < b.i ? -1 : 1;
+}
+
+inline
+int compareDoubles(double x, double y)
+{
+    int64d_t a, b;
+
+    a.d = x;
+    b.d = y;
+
+    if( a.l & 0x8000000000000000LL )
+        a.l = 0x8000000000000000LL - a.l;
+    if( b.l & 0x8000000000000000LL )
+        b.l = 0x8000000000000000LL - b.l;
+
+    if( a.l == b.l )
+        return 0;
+
+    return a.l < b.l ? -1 : 1;
+}
+
+void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed)
+{
+    char const *fpSizeStr = NULL;
+    char const *fpFastRelaxedStr = "";
+    switch (float_size) {
+    case sizeof(cl_double):
+        fpSizeStr = "fp64";
+        break;
+    case sizeof(cl_float):
+        fpSizeStr = "fp32";
+        break;
+    case sizeof(cl_half):
+        fpSizeStr = "fp16";
+        break;
+    }
+    if (isFastRelaxed) {
+        fpFastRelaxedStr = "rlx";
+    }
+    vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr);
+}
+

diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h
new file mode 100644
index 0000000..9c14910
--- /dev/null
+++ b/test_conformance/math_brute_force/Utility.h

@@ -0,0 +1,254 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef UTILITY_H
+#define UTILITY_H
+
+#include "harness/compat.h"
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+#include <stdio.h>
+#include "harness/rounding_mode.h"
+#include "harness/fpcontrol.h"
+#include "harness/testHarness.h"
+#include "harness/ThreadPool.h"
+#define BUFFER_SIZE         (1024*1024*2)
+
+#if defined( __GNUC__ )
+    #define UNUSED  __attribute__ ((unused))
+#else
+    #define UNUSED
+#endif
+
+extern int gWimpyBufferSize;
+extern int gWimpyReductionFactor;
+
+#define VECTOR_SIZE_COUNT   6
+extern const char *sizeNames[VECTOR_SIZE_COUNT];
+extern const int   sizeValues[VECTOR_SIZE_COUNT];
+
+extern cl_device_id     gDevice;
+extern cl_context       gContext;
+extern cl_command_queue gQueue;
+extern void             *gIn;
+extern void             *gIn2;
+extern void             *gIn3;
+extern void             *gOut_Ref;
+extern void             *gOut_Ref2;
+extern void             *gOut[VECTOR_SIZE_COUNT];
+extern void             *gOut2[VECTOR_SIZE_COUNT];
+extern cl_mem           gInBuffer;
+extern cl_mem           gInBuffer2;
+extern cl_mem           gInBuffer3;
+extern cl_mem           gOutBuffer[VECTOR_SIZE_COUNT];
+extern cl_mem           gOutBuffer2[VECTOR_SIZE_COUNT];
+extern uint32_t         gComputeDevices;
+extern uint32_t         gSimdSize;
+extern int              gSkipCorrectnessTesting;
+extern int              gMeasureTimes;
+extern int              gReportAverageTimes;
+extern int              gForceFTZ;
+extern volatile int     gTestFastRelaxed;
+extern int              gFastRelaxedDerived;
+extern int              gWimpyMode;
+extern int              gHasDouble;
+extern int              gIsInRTZMode;
+extern int              gInfNanSupport;
+extern int              gIsEmbedded;
+extern int              gVerboseBruteForce;
+extern uint32_t         gMaxVectorSizeIndex;
+extern uint32_t         gMinVectorSizeIndex;
+extern uint32_t         gDeviceFrequency;
+extern cl_device_fp_config gFloatCapabilities;
+extern cl_device_fp_config gDoubleCapabilities;
+
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+
+#include "harness/errorHelpers.h"
+
+#if defined (_MSC_VER )
+    //Deal with missing scalbn on windows
+    #define scalbnf( _a, _i )       ldexpf( _a, _i )
+    #define scalbn( _a, _i )        ldexp( _a, _i )
+    #define scalbnl( _a, _i )       ldexpl( _a, _i )
+#endif
+
+float Abs_Error( float test, double reference );
+float Ulp_Error( float test, double reference );
+//float Ulp_Error_Half( float test, double reference );
+float Bruteforce_Ulp_Error_Double( double test, long double reference );
+
+uint64_t GetTime( void );
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
+int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+// used to convert a bucket of bits into a search pattern through double
+static inline double DoubleFromUInt32( uint32_t bits );
+static inline double DoubleFromUInt32( uint32_t bits )
+{
+    union{ uint64_t u; double d;} u;
+
+    // split 0x89abcdef to 0x89abc00000000def
+    u.u = bits & 0xfffU;
+    u.u |= (uint64_t) (bits & ~0xfffU) << 32;
+
+    // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
+    u.u -= (bits & 0x800U) << 1;
+
+    // return result
+    return u.d;
+}
+
+void _LogBuildError( cl_program p, int line, const char *file );
+#define LogBuildError( program )        _LogBuildError( program, __LINE__, __FILE__ )
+
+#define PERF_LOOP_COUNT 100
+
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf((float)x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+
+static inline int IsDoubleSubnormal( long double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs((double) x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
+// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
+static inline int IsFloatResultSubnormal( double x, float ulps )
+{
+    x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
+    return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+}
+
+static inline int IsFloatResultSubnormalAbsError( double x , float abs_err)
+{
+  x = x - abs_err;
+  return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+}
+
+static inline int IsDoubleResultSubnormal( long double x, float ulps )
+{
+    x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
+    return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
+}
+
+static inline int IsFloatInfinity(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F800000U);
+}
+
+static inline int IsFloatMaxFloat(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
+}
+
+static inline int IsFloatNaN(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) > 0x7F800000U);
+}
+
+extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
+
+// Windows (since long double got deprecated) sets the x87 to 53-bit precision
+// (that's x87 default state).  This causes problems with the tests that
+// convert long and ulong to float and double or otherwise deal with values
+// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
+static inline void Force64BitFPUPrecision(void)
+{
+#if __MINGW32__
+    // The usual method is to use _controlfp as follows:
+    //     #include <float.h>
+    //     _controlfp(_PC_64, _MCW_PC);
+    //
+    // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
+    // divergent code just use inline assembly which works for both.
+    unsigned short int orig_cw = 0;
+    unsigned short int new_cw = 0;
+    __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
+    new_cw = orig_cw | 0x0300;   // set precision to 64-bit
+    __asm__ __volatile__ ("fldcw  %0"::"m" (new_cw));
+#elif defined( _WIN32 ) && defined( __INTEL_COMPILER )
+    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
+    // > On the x64 architecture, changing the floating point precision is not supported.
+    // (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
+    int cw;
+    __asm { fnstcw cw };    // Get current value of FPU control word.
+    cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
+    __asm { fldcw cw };     // Set new value of FPU control word.
+#else
+    /* Implement for other platforms if needed */
+#endif
+}
+
+extern
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+
+typedef union
+{
+    int32_t i;
+    float   f;
+}int32f_t;
+
+typedef union
+{
+    int64_t l;
+    double  d;
+}int64d_t;
+
+void MulD(double *rhi, double *rlo, double u, double v);
+void AddD(double *rhi, double *rlo, double a, double b);
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void DivideDD(double *chi, double *clo, double a, double b);
+int compareFloats(float x, float y);
+int compareDoubles(double x, double y);
+
+void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
+
+#endif /* UTILITY_H */
+
+

diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp
new file mode 100644
index 0000000..4155a41
--- /dev/null
+++ b/test_conformance/math_brute_force/binary.cpp

@@ -0,0 +1,1556 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double_Double(const Func *f, MTdata);
+int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata);
+int TestFunc_Float_Float_Float_common(const Func *f, MTdata, int isNextafter);
+int TestFunc_Double_Double_Double_common(const Func *f, MTdata, int isNextafter);
+
+const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
+const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
+
+extern const vtbl _binary = { "binary", TestFunc_Float_Float_Float,
+                              TestFunc_Double_Double_Double };
+
+extern const vtbl _binary_nextafter = {
+    "binary_nextafter", TestFunc_Float_Float_Float_nextafter,
+    TestFunc_Double_Double_Double_nextafter
+};
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, f1 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, f1 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, d1 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, d1 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    double      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    int         isFDim;
+    int         skipNanInf;
+    int         isNextafter;
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    int         skipTestingRelaxed = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = 1;
+
+    if (gWimpyMode){
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
+    test_info.skipNanInf = test_info.isFDim  && ! gInfNanSupport;
+    test_info.isNextafter = isNextafter;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf2 )
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer2 for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gOutBuffer[%d] for region {%zd, %zd}\n", (int) j, region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
+            p2[j] = 0x3fc00000;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata( test_info.tinfo[i].d );
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    cl_uchar    *overflow = (cl_uchar*)malloc(buffer_size);
+    const char  *name = job->f->name;
+    int         isFDim = job->isFDim;
+    int         skipNanInf = job->skipNanInf;
+    int         isNextafter = job->isNextafter;
+    cl_uint     *t = 0;
+    float       *r=0,*s=0,*s2=0;
+    cl_int copysign_test = 0;
+    RoundingMode oldRoundMode;
+    int skipVerification = 0;
+
+    if(gTestFastRelaxed)
+    {
+      if (strcmp(name,"pow")==0 && gFastRelaxedDerived)
+      {
+        func = job->f->rfunc;
+        ulps = INFINITY;
+        skipVerification = 1;
+      }else
+      {
+        func = job->f->rfunc;
+        ulps = job->f->relaxed_error;
+      }
+    }
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if (job_id <= (cl_uint)indx)
+    { // test edge cases
+        float *fp = (float *)p;
+        float *fp2 = (float *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesFloat[x];
+            fp2[j] = specialValuesFloat[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesFloatCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+    {
+        if( (error = clFinish(tinfo->tQueue)) )
+        {
+          vlog_error( "Error: clFinish failed! err: %d\n", error );
+          goto exit;
+        }
+        free(overflow);
+        return CL_SUCCESS;
+    }
+
+    FPU_mode_type oldMode;
+    oldRoundMode = kRoundToNearestEven;
+    if( isFDim )
+    {
+        //Calculate the correctly rounded reference result
+        memset( &oldMode, 0, sizeof( oldMode ) );
+        if( ftz )
+            ForceFTZ( &oldMode );
+
+        // Set the rounding mode to match the device
+        if (gIsInRTZMode)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+    }
+
+    if(!strcmp(name, "copysign"))
+        copysign_test = 1;
+
+#define ref_func(s, s2) (copysign_test ? func.f_ff_f( s, s2 ) : func.f_ff( s, s2 ))
+
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    if( skipNanInf )
+    {
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            feclearexcept(FE_OVERFLOW);
+            r[j] = (float) ref_func( s[j], s2[j] );
+            overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+        }
+    }
+    else
+    {
+        for( j = 0; j < buffer_elements; j++ )
+            r[j] = (float) ref_func( s[j], s2[j] );
+    }
+
+    if( isFDim && ftz )
+        RestoreFPState( &oldMode );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    if (!skipVerification) {
+        //Verify data
+        t = (cl_uint *)r;
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                cl_uint *q = out[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = ref_func( s[j], s2[j] );
+
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                    // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                    // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                    if ( gTestFastRelaxed || skipNanInf)
+                    {
+                        if( skipNanInf && overflow[j])
+                            continue;
+                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                        if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                            IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                            IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                            continue;
+                    }
+
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatResultSubnormal(correct, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // nextafter on FTZ platforms may return the smallest
+                        // normal float (2^-126) given a denormal or a zero
+                        // as the first argument. The rationale here is that
+                        // nextafter flushes the argument to zero and then
+                        // returns the next representable number in the
+                        // direction of the second argument, and since
+                        // denorms are considered as zero, the smallest
+                        // normal number is the next representable number.
+                        // In which case, it should have the same sign as the
+                        // second argument.
+                        if (isNextafter )
+                        {
+                            if(IsFloatSubnormal(s[j]) || s[j] == 0.0f)
+                            {
+                                float value = copysignf(twoToMinus126, s2[j]);
+                                fail = fail && (test != value);
+                                if (!fail)
+                                    err = 0.0f;
+                            }
+                        }
+                        else
+                        {
+                            // retry per section 6.5.3.3
+                            if( IsFloatSubnormal( s[j] ) )
+                            {
+                                double correct2, correct3;
+                                float err2, err3;
+
+                                if( skipNanInf )
+                                    feclearexcept(FE_OVERFLOW);
+
+                                correct2 = ref_func( 0.0, s2[j] );
+                                correct3 = ref_func( -0.0, s2[j] );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                                // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                if( gTestFastRelaxed || skipNanInf )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) && skipNanInf )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                        IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                //try with both args as zero
+                                if( IsFloatSubnormal( s2[j] )  )
+                                {
+                                    double correct4, correct5;
+                                    float err4, err5;
+
+                                    if( skipNanInf )
+                                        feclearexcept(FE_OVERFLOW);
+
+                                    correct2 = ref_func( 0.0, 0.0 );
+                                    correct3 = ref_func( -0.0, 0.0 );
+                                    correct4 = ref_func( 0.0, -0.0 );
+                                    correct5 = ref_func( -0.0, -0.0 );
+
+                                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                    // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                                    // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                    // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                    if( gTestFastRelaxed || skipNanInf )
+                                    {
+                                        if( fetestexcept(FE_OVERFLOW) && skipNanInf )
+                                            continue;
+
+                                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                                        if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                            IsFloatInfinity(correct3) || IsFloatNaN(correct3)   ||
+                                            IsFloatInfinity(correct4) || IsFloatNaN(correct4)   ||
+                                            IsFloatInfinity(correct5) || IsFloatNaN(correct5)    )
+                                            continue;
+                                    }
+
+                                    err2 = Ulp_Error( test, correct2  );
+                                    err3 = Ulp_Error( test, correct3  );
+                                    err4 = Ulp_Error( test, correct4  );
+                                    err5 = Ulp_Error( test, correct5  );
+                                    fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                                     (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ||
+                                        IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if(IsFloatSubnormal(s2[j]) )
+                            {
+                                double correct2, correct3;
+                                float err2, err3;
+
+                                if( skipNanInf )
+                                    feclearexcept(FE_OVERFLOW);
+
+                                correct2 = ref_func( s[j], 0.0 );
+                                correct3 = ref_func( s[j], -0.0 );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                                // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                if ( gTestFastRelaxed || skipNanInf )
+                                {
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( overflow[j] && skipNanInf)
+                                        continue;
+
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                        IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > tinfo->maxError )
+                    {
+                        tinfo->maxError = fabsf(err);
+                        tinfo->maxErrorValue = s[j];
+                        tinfo->maxErrorValue2 = s2[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a (0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], ((cl_uint*)s)[j], s2[j], ((cl_uint*)s2)[j], r[j], test, ((cl_uint*)&test)[0], j );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+    }
+
+    if (isFDim && gIsInRTZMode)
+        (void)set_round(oldRoundMode, kfloat);
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+
+exit:
+    if( overflow )
+        free( overflow );
+    return error;
+
+}
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafter)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = 1;
+
+
+    if (gWimpyMode){
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
+    test_info.skipNanInf = 0;
+    test_info.isNextafter = isNextafter;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+       vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata( test_info.tinfo[i].d );
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+
+    int         isNextafter = job->isNextafter;
+    cl_ulong    *t;
+    cl_double   *r,*s,*s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_double *fp2 = (cl_double *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            fp2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int64(d);
+        p2[j] = genrand_int64(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_ff( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_ff( s[j], s2[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // nextafter on FTZ platforms may return the smallest
+                    // normal float (2^-126) given a denormal or a zero
+                    // as the first argument. The rationale here is that
+                    // nextafter flushes the argument to zero and then
+                    // returns the next representable number in the
+                    // direction of the second argument, and since
+                    // denorms are considered as zero, the smallest
+                    // normal number is the next representable number.
+                    // In which case, it should have the same sign as the
+                    // second argument.
+                    if (isNextafter )
+                    {
+                        if(IsDoubleSubnormal(s[j]) || s[j] == 0.0f)
+                        {
+                            cl_double value = copysign(twoToMinus1022, s2[j]);
+                            fail = fail && (test != value);
+                            if (!fail)
+                                err = 0.0f;
+                        }
+                    }
+                    else
+                    {
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2 = func.f_ff( 0.0, s2[j] );
+                            long double correct3 = func.f_ff( -0.0, s2[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with both args as zero
+                            if( IsDoubleSubnormal( s2[j] )  )
+                            {
+                                correct2 = func.f_ff( 0.0, 0.0 );
+                                correct3 = func.f_ff( -0.0, 0.0 );
+                                long double correct4 = func.f_ff( 0.0, -0.0 );
+                                long double correct5 = func.f_ff( -0.0, -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                                 (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if(IsDoubleSubnormal(s2[j]) )
+                        {
+                            long double correct2 = func.f_ff( s[j], 0.0 );
+                            long double correct3 = func.f_ff( s[j], -0.0 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %.13la}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+exit:
+    return error;
+
+}
+
+int TestFunc_Float_Float_Float(const Func *f, MTdata d)
+{
+    return TestFunc_Float_Float_Float_common(f, d, 0);
+}
+
+int TestFunc_Double_Double_Double(const Func *f, MTdata d)
+{
+    return TestFunc_Double_Double_Double_common(f, d, 0);
+}
+
+int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata d)
+{
+    return TestFunc_Float_Float_Float_common(f, d, 1);
+}
+
+int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata d)
+{
+    return TestFunc_Double_Double_Double_common(f, d, 1);
+}
+

diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp
new file mode 100644
index 0000000..7676625
--- /dev/null
+++ b/test_conformance/math_brute_force/binaryOperator.cpp

@@ -0,0 +1,1462 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata);
+
+extern const vtbl _binary_operator = { "binaryOperator",
+                                       TestFunc_Float_Float_Float_Operator,
+                                       TestFunc_Double_Double_Double_Operator };
+
+static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   out[i] =  in1[i] ", operator_symbol, " in2[i];\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = f0 ", operator_symbol, " f1;\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = f0 ", operator_symbol, " f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   out[i] =  in1[i] ", operator_symbol, " in2[i];\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = d0 ", operator_symbol, " d1;\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = d0 ", operator_symbol, " d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *name;
+    const char  *operator_symbol;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    double      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    // no special fields
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode) {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
+            p2[j] = 0x3fc00000;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    if ( gTestFastRelaxed )
+    {
+      func = job->f->rfunc;
+    }
+
+
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    cl_uchar    *overflow = (cl_uchar*)malloc(buffer_size);
+    const char  *name = job->f->name;
+    cl_uint     *t;
+    cl_float    *r,*s,*s2;
+    RoundingMode oldRoundMode;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+
+    if( job_id <= (cl_uint)indx ) {
+        // Insert special values
+        uint32_t x, y;
+
+        x = (job_id * buffer_elements) % specialValuesFloatCount;
+        y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ ) {
+            p[j] = ((cl_uint *)specialValuesFloat)[x];
+            p2[j] = ((cl_uint *)specialValuesFloat)[y];
+            ++x;
+            if (x >= specialValuesFloatCount) {
+                x = 0;
+                y++;
+                if (y >= specialValuesFloatCount)
+                    break;
+            }
+            if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+                cl_uint pj = p[j] & 0x7fffffff;
+                cl_uint p2j = p2[j] & 0x7fffffff;
+                // Replace values outside [2^-62, 2^62] with QNaN
+                if (pj < 0x20800000 || pj > 0x5e800000)
+                    p[j] = 0x7fc00000;
+                if (p2j < 0x20800000 || p2j > 0x5e800000)
+                    p2[j] = 0x7fc00000;
+            }
+        }
+    }
+
+    // Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+
+        if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+            cl_uint pj = p[j] & 0x7fffffff;
+            cl_uint p2j = p2[j] & 0x7fffffff;
+            // Replace values outside [2^-62, 2^62] with QNaN
+            if (pj < 0x20800000 || pj > 0x5e800000)
+                p[j] = 0x7fc00000;
+            if (p2j < 0x20800000 || p2j > 0x5e800000)
+                p2[j] = 0x7fc00000;
+        }
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+    {
+        free( overflow );
+        return CL_SUCCESS;
+    }
+
+    //Calculate the correctly rounded reference result
+    FPU_mode_type oldMode;
+    memset( &oldMode, 0, sizeof( oldMode ) );
+    if( ftz )
+        ForceFTZ( &oldMode );
+
+    // Set the rounding mode to match the device
+    oldRoundMode = kRoundToNearestEven;
+    if (gIsInRTZMode)
+        oldRoundMode = set_round(kRoundTowardZero, kfloat);
+
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    if( gInfNanSupport )
+    {
+        for( j = 0; j < buffer_elements; j++ )
+            r[j] = (float) func.f_ff( s[j], s2[j] );
+    }
+    else
+    {
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            feclearexcept(FE_OVERFLOW);
+            r[j] = (float) func.f_ff( s[j], s2[j] );
+            overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+        }
+    }
+
+    if (gIsInRTZMode)
+      (void)set_round(oldRoundMode, kfloat);
+
+    if( ftz )
+        RestoreFPState( &oldMode );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_uint *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_uint *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_ff( s[j], s2[j] );
+
+                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                if ( !gInfNanSupport)
+                {
+                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                    if( overflow[j]                                         ||
+                        IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                        IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                        IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                        continue;
+                }
+
+        // Per section 10 paragraph 6, accept embedded devices always returning positive 0.0.
+        if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue;
+
+                float err = Ulp_Error( test, correct );
+                float errB = Ulp_Error( test, (float) correct  );
+
+                if( gTestFastRelaxed )
+                  ulps = job->f->relaxed_error;
+
+                int fail = ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
+                if( fabsf( errB ) < fabsf(err ) )
+                  err = errB;
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsFloatResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if( IsFloatSubnormal( s[j] ) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+
+                        if( !gInfNanSupport )
+                            feclearexcept(FE_OVERFLOW);
+
+                        correct2 = func.f_ff( 0.0, s2[j] );
+                        correct3 = func.f_ff( -0.0, s2[j] );
+
+                        // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                        if( !gInfNanSupport )
+                        {
+                            if( fetestexcept(FE_OVERFLOW) )
+                                continue;
+
+                            // Note: no double rounding here.  Reference functions calculate in single precision.
+                            if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                continue;
+                        }
+
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        //try with both args as zero
+                        if( IsFloatSubnormal( s2[j] )  )
+                        {
+                            double correct4, correct5;
+                            float err4, err5;
+
+                            if( !gInfNanSupport )
+                                feclearexcept(FE_OVERFLOW);
+
+                            correct2 = func.f_ff( 0.0, 0.0 );
+                            correct3 = func.f_ff( -0.0, 0.0 );
+                            correct4 = func.f_ff( 0.0, -0.0 );
+                            correct5 = func.f_ff( -0.0, -0.0 );
+
+                            // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                            if( !gInfNanSupport )
+                            {
+                                if( fetestexcept(FE_OVERFLOW) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                    IsFloatInfinity(correct3) || IsFloatNaN(correct3)   ||
+                                    IsFloatInfinity(correct4) || IsFloatNaN(correct4)   ||
+                                    IsFloatInfinity(correct5) || IsFloatNaN(correct5)    )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            err4 = Ulp_Error( test, correct4  );
+                            err5 = Ulp_Error( test, correct5  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                             (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( fabsf( err4 ) < fabsf(err ) )
+                                err = err4;
+                            if( fabsf( err5 ) < fabsf(err ) )
+                                err = err5;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ||
+                                IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    else if(IsFloatSubnormal(s2[j]) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+
+                        if( !gInfNanSupport )
+                            feclearexcept(FE_OVERFLOW);
+
+                        correct2 = func.f_ff( s[j], 0.0 );
+                        correct3 = func.f_ff( s[j], -0.0 );
+
+                        // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                        if ( !gInfNanSupport)
+                        {
+                            // Note: no double rounding here.  Reference functions calculate in single precision.
+                            if( overflow[j]                                         ||
+                                IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                                IsFloatInfinity(correct2)|| IsFloatNaN(correct2)    )
+                                continue;
+                        }
+
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint*)&test)[0], j );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step,  job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+exit:
+    if( overflow )
+        free( overflow );
+    return error;
+
+}
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_ulong    *t;
+    cl_double   *r,*s,*s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_double *fp2 = (cl_double *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            fp2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int64(d);
+        p2[j] = genrand_int64(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_ff( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_ff( s[j], s2[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+
+                    // retry per section 6.5.3.3
+                    if( IsDoubleSubnormal( s[j] ) )
+                    {
+                        long double correct2 = func.f_ff( 0.0, s2[j] );
+                        long double correct3 = func.f_ff( -0.0, s2[j] );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        //try with both args as zero
+                        if( IsDoubleSubnormal( s2[j] )  )
+                        {
+                            correct2 = func.f_ff( 0.0, 0.0 );
+                            correct3 = func.f_ff( -0.0, 0.0 );
+                            long double correct4 = func.f_ff( 0.0, -0.0 );
+                            long double correct5 = func.f_ff( -0.0, -0.0 );
+                            err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                             (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( fabsf( err4 ) < fabsf(err ) )
+                                err = err4;
+                            if( fabsf( err5 ) < fabsf(err ) )
+                                err = err5;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ||
+                                IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    else if(IsDoubleSubnormal(s2[j]) )
+                    {
+                        long double correct2 = func.f_ff( s[j], 0.0 );
+                        long double correct3 = func.f_ff( s[j], -0.0 );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+exit:
+    return error;
+
+}
+
+
+
+

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
deleted file mode 100644
index 4baa499..0000000
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ /dev/null

@@ -1,827 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-
-    int isFDim;
-    int skipNanInf;
-    int isNextafter;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.0,
-    -100.0,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-
-    +NAN,
-    +INFINITY,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-};
-
-static size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
-    test_info.skipNanInf = 0;
-    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-
-    int isNextafter = job->isNextafter;
-    cl_ulong *t;
-    cl_double *r;
-    cl_double *s;
-    cl_double *s2;
-
-    Force64BitFPUPrecision();
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
-    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        cl_double *fp = (cl_double *)p;
-        cl_double *fp2 = (cl_double *)p2;
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int64(d);
-        p2[idx] = genrand_int64(d);
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    // Verify data
-    t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-
-                if (fail && ftz)
-                {
-                    // retry per section 6.5.3.2
-                    if (IsDoubleResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-
-                    // nextafter on FTZ platforms may return the smallest
-                    // normal float (2^-126) given a denormal or a zero
-                    // as the first argument. The rationale here is that
-                    // nextafter flushes the argument to zero and then
-                    // returns the next representable number in the
-                    // direction of the second argument, and since
-                    // denorms are considered as zero, the smallest
-                    // normal number is the next representable number.
-                    // In which case, it should have the same sign as the
-                    // second argument.
-                    if (isNextafter)
-                    {
-                        if (IsDoubleSubnormal(s[j]) || s[j] == 0.0f)
-                        {
-                            cl_double value = copysign(twoToMinus1022, s2[j]);
-                            fail = fail && (test != value);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                    else
-                    {
-                        // retry per section 6.5.3.3
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            long double correct2 = func.f_ff(0.0, s2[j]);
-                            long double correct3 = func.f_ff(-0.0, s2[j]);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-
-                            // try with both args as zero
-                            if (IsDoubleSubnormal(s2[j]))
-                            {
-                                correct2 = func.f_ff(0.0, 0.0);
-                                correct3 = func.f_ff(-0.0, 0.0);
-                                long double correct4 = func.f_ff(0.0, -0.0);
-                                long double correct5 = func.f_ff(-0.0, -0.0);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= ulps))
-                                        && (!(fabsf(err3) <= ulps))
-                                        && (!(fabsf(err4) <= ulps))
-                                        && (!(fabsf(err5) <= ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                // retry per section 6.5.3.4
-                                if (IsDoubleResultSubnormal(correct2, ulps)
-                                    || IsDoubleResultSubnormal(correct3, ulps)
-                                    || IsDoubleResultSubnormal(correct4, ulps)
-                                    || IsDoubleResultSubnormal(correct5, ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (IsDoubleSubnormal(s2[j]))
-                        {
-                            long double correct2 = func.f_ff(s[j], 0.0);
-                            long double correct3 = func.f_ff(s[j], -0.0);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                }
-
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at {%.13la, "
-                               "%.13la}: *%.13la vs. %.13la\n",
-                               name, sizeNames[k], err, s[j], s2[j], r[j],
-                               test);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    return error;
-}

diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
deleted file mode 100644
index 32caafa..0000000
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ /dev/null

@@ -1,988 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-
-    int isFDim;
-    int skipNanInf;
-    int isNextafter;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-
-    +NAN,
-    +INFINITY,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
-    test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
-    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    bool relaxedMode = job->relaxedMode;
-    float ulps = getAllowedUlpError(job->f, relaxedMode);
-    MTdata d = tinfo->d;
-    cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
-    const char *name = job->f->name;
-    int isFDim = job->isFDim;
-    int skipNanInf = job->skipNanInf;
-    int isNextafter = job->isNextafter;
-    cl_uint *t = 0;
-    cl_float *r = 0;
-    cl_float *s = 0;
-    cl_float *s2 = 0;
-    cl_int copysign_test = 0;
-    RoundingMode oldRoundMode;
-    int skipVerification = 0;
-
-    if (relaxedMode)
-    {
-        func = job->f->rfunc;
-        if (strcmp(name, "pow") == 0 && gFastRelaxedDerived)
-        {
-            ulps = INFINITY;
-            skipVerification = 1;
-        }
-    }
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        float *fp = (float *)p;
-        float *fp2 = (float *)p2;
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting)
-    {
-        if ((error = clFinish(tinfo->tQueue)))
-        {
-            vlog_error("Error: clFinish failed! err: %d\n", error);
-            goto exit;
-        }
-        free(overflow);
-        return CL_SUCCESS;
-    }
-
-    FPU_mode_type oldMode;
-    oldRoundMode = kRoundToNearestEven;
-    if (isFDim)
-    {
-        // Calculate the correctly rounded reference result
-        memset(&oldMode, 0, sizeof(oldMode));
-        if (ftz) ForceFTZ(&oldMode);
-
-        // Set the rounding mode to match the device
-        if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
-    }
-
-    if (!strcmp(name, "copysign")) copysign_test = 1;
-
-#define ref_func(s, s2) (copysign_test ? func.f_ff_f(s, s2) : func.f_ff(s, s2))
-
-    // Calculate the correctly rounded reference result
-    r = (float *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (float *)gIn2 + thread_id * buffer_elements;
-    if (skipNanInf)
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            feclearexcept(FE_OVERFLOW);
-            r[j] = (float)ref_func(s[j], s2[j]);
-            overflow[j] =
-                FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-        }
-    }
-    else
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-            r[j] = (float)ref_func(s[j], s2[j]);
-    }
-
-    if (isFDim && ftz) RestoreFPState(&oldMode);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    if (!skipVerification)
-    {
-        // Verify data
-        t = (cl_uint *)r;
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                cl_uint *q = out[k];
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    float test = ((float *)q)[j];
-                    double correct = ref_func(s[j], s2[j]);
-
-                    // Per section 10 paragraph 6, accept any result if an input
-                    // or output is a infinity or NaN or overflow As per
-                    // OpenCL 2.0 spec, section 5.8.4.3, enabling
-                    // fast-relaxed-math mode also enables -cl-finite-math-only
-                    // optimization. This optimization allows to assume that
-                    // arguments and results are not NaNs or +/-INFs. Hence,
-                    // accept any result if inputs or results are NaNs or INFs.
-                    if (relaxedMode || skipNanInf)
-                    {
-                        if (skipNanInf && overflow[j]) continue;
-                        // Note: no double rounding here.  Reference functions
-                        // calculate in single precision.
-                        if (IsFloatInfinity(correct) || IsFloatNaN(correct)
-                            || IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
-                            || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
-                            continue;
-                    }
-
-                    float err = Ulp_Error(test, correct);
-                    int fail = !(fabsf(err) <= ulps);
-
-                    if (fail && ftz)
-                    {
-                        // retry per section 6.5.3.2
-                        if (IsFloatResultSubnormal(correct, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // nextafter on FTZ platforms may return the smallest
-                        // normal float (2^-126) given a denormal or a zero
-                        // as the first argument. The rationale here is that
-                        // nextafter flushes the argument to zero and then
-                        // returns the next representable number in the
-                        // direction of the second argument, and since
-                        // denorms are considered as zero, the smallest
-                        // normal number is the next representable number.
-                        // In which case, it should have the same sign as the
-                        // second argument.
-                        if (isNextafter)
-                        {
-                            if (IsFloatSubnormal(s[j]) || s[j] == 0.0f)
-                            {
-                                float value = copysignf(twoToMinus126, s2[j]);
-                                fail = fail && (test != value);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                        else
-                        {
-                            // retry per section 6.5.3.3
-                            if (IsFloatSubnormal(s[j]))
-                            {
-                                double correct2, correct3;
-                                float err2, err3;
-
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                                correct2 = ref_func(0.0, s2[j]);
-                                correct3 = ref_func(-0.0, s2[j]);
-
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow As per OpenCL 2.0 spec,
-                                // section 5.8.4.3, enabling fast-relaxed-math
-                                // mode also enables -cl-finite-math-only
-                                // optimization. This optimization allows to
-                                // assume that arguments and results are not
-                                // NaNs or +/-INFs. Hence, accept any result if
-                                // inputs or results are NaNs or INFs.
-                                if (relaxedMode || skipNanInf)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW) && skipNanInf)
-                                        continue;
-
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3))
-                                        continue;
-                                }
-
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= ulps))
-                                        && (!(fabsf(err3) <= ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-
-                                // retry per section 6.5.3.4
-                                if (IsFloatResultSubnormal(correct2, ulps)
-                                    || IsFloatResultSubnormal(correct3, ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-
-                                // try with both args as zero
-                                if (IsFloatSubnormal(s2[j]))
-                                {
-                                    double correct4, correct5;
-                                    float err4, err5;
-
-                                    if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                                    correct2 = ref_func(0.0, 0.0);
-                                    correct3 = ref_func(-0.0, 0.0);
-                                    correct4 = ref_func(0.0, -0.0);
-                                    correct5 = ref_func(-0.0, -0.0);
-
-                                    // Per section 10 paragraph 6, accept any
-                                    // result if an input or output is a
-                                    // infinity or NaN or overflow As per
-                                    // OpenCL 2.0 spec, section 5.8.4.3,
-                                    // enabling fast-relaxed-math mode also
-                                    // enables -cl-finite-math-only
-                                    // optimization. This optimization allows to
-                                    // assume that arguments and results are not
-                                    // NaNs or +/-INFs. Hence, accept any result
-                                    // if inputs or results are NaNs or INFs.
-                                    if (relaxedMode || skipNanInf)
-                                    {
-                                        if (fetestexcept(FE_OVERFLOW)
-                                            && skipNanInf)
-                                            continue;
-
-                                        // Note: no double rounding here.
-                                        // Reference functions calculate in
-                                        // single precision.
-                                        if (IsFloatInfinity(correct2)
-                                            || IsFloatNaN(correct2)
-                                            || IsFloatInfinity(correct3)
-                                            || IsFloatNaN(correct3)
-                                            || IsFloatInfinity(correct4)
-                                            || IsFloatNaN(correct4)
-                                            || IsFloatInfinity(correct5)
-                                            || IsFloatNaN(correct5))
-                                            continue;
-                                    }
-
-                                    err2 = Ulp_Error(test, correct2);
-                                    err3 = Ulp_Error(test, correct3);
-                                    err4 = Ulp_Error(test, correct4);
-                                    err5 = Ulp_Error(test, correct5);
-                                    fail = fail
-                                        && ((!(fabsf(err2) <= ulps))
-                                            && (!(fabsf(err3) <= ulps))
-                                            && (!(fabsf(err4) <= ulps))
-                                            && (!(fabsf(err5) <= ulps)));
-                                    if (fabsf(err2) < fabsf(err)) err = err2;
-                                    if (fabsf(err3) < fabsf(err)) err = err3;
-                                    if (fabsf(err4) < fabsf(err)) err = err4;
-                                    if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                    // retry per section 6.5.3.4
-                                    if (IsFloatResultSubnormal(correct2, ulps)
-                                        || IsFloatResultSubnormal(correct3,
-                                                                  ulps)
-                                        || IsFloatResultSubnormal(correct4,
-                                                                  ulps)
-                                        || IsFloatResultSubnormal(correct5,
-                                                                  ulps))
-                                    {
-                                        fail = fail && (test != 0.0f);
-                                        if (!fail) err = 0.0f;
-                                    }
-                                }
-                            }
-                            else if (IsFloatSubnormal(s2[j]))
-                            {
-                                double correct2, correct3;
-                                float err2, err3;
-
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                                correct2 = ref_func(s[j], 0.0);
-                                correct3 = ref_func(s[j], -0.0);
-
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow As per OpenCL 2.0 spec,
-                                // section 5.8.4.3, enabling fast-relaxed-math
-                                // mode also enables -cl-finite-math-only
-                                // optimization. This optimization allows to
-                                // assume that arguments and results are not
-                                // NaNs or +/-INFs. Hence, accept any result if
-                                // inputs or results are NaNs or INFs.
-                                if (relaxedMode || skipNanInf)
-                                {
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (overflow[j] && skipNanInf) continue;
-
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3))
-                                        continue;
-                                }
-
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= ulps))
-                                        && (!(fabsf(err3) <= ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-
-                                // retry per section 6.5.3.4
-                                if (IsFloatResultSubnormal(correct2, ulps)
-                                    || IsFloatResultSubnormal(correct3, ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                    }
-
-                    if (fabsf(err) > tinfo->maxError)
-                    {
-                        tinfo->maxError = fabsf(err);
-                        tinfo->maxErrorValue = s[j];
-                        tinfo->maxErrorValue2 = s2[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a "
-                            "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n",
-                            name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
-                            s2[j], ((cl_uint *)s2)[j], r[j], test,
-                            ((cl_uint *)&test)[0], j);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-    }
-
-    if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    if (overflow) free(overflow);
-    return error;
-}

diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp
new file mode 100644
index 0000000..a29a876
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_i.cpp

@@ -0,0 +1,1233 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include <limits.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_Float_Int(const Func *f, MTdata);
+int TestFunc_Double_Double_Int(const Func *f, MTdata);
+
+extern const vtbl _binary_i = { "binary_i", TestFunc_Float_Float_Int,
+                                TestFunc_Double_Double_Int };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global int* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, i0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       int3 i0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global int* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, i0 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0;\n"
+                            "       int3 i0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+
+static const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147,
+                                            -1, -2, -3, -126, -127, -128, -0x02000001, -0x04000001, -1465264071, -1488522147 };
+static size_t specialValuesIntCount = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    cl_int      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    // no special values
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float_Int(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    cl_int      maxErrorVal2 = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) };
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    // Run the kernels
+    error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+
+
+    // Accumulate the arithmetic errors
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        if( test_info.tinfo[i].maxError > maxError )
+        {
+            maxError = test_info.tinfo[i].maxError;
+            maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+        }
+    }
+
+    if( error )
+        goto exit;
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x38000000;
+            p2[j] = 3;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_uint     *t;
+    cl_float    *r,*s;
+    cl_int      *s2;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesIntCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        float *fp = (float *)p;
+        cl_int *ip2 = (cl_int *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+          fp[j] = specialValuesFloat[x];
+          ip2[j] = specialValuesInt[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesIntCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+      p[j] = genrand_int32(d);
+      p2[j] = genrand_int32(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_int *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (float) func.f_fi( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_uint *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_uint *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_fi( s[j], s2[j] );
+                float err = Ulp_Error( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsFloatResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if( IsFloatSubnormal( s[j] ) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+                        correct2 = func.f_fi( 0.0, s2[j] );
+                        correct3 = func.f_fi( -0.0, s2[j] );
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %d}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint*)&test)[0], j );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step,  job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+exit:
+    return error;
+
+}
+
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+static const int specialValuesInt2[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX,
+                                            -1, -2, -3, -1022, -1023, -11024, -INT_MAX };
+static size_t specialValuesInt2Count = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Double_Double_Int(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    cl_int      maxErrorVal2 = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) };
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+
+
+    // Accumulate the arithmetic errors
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        if( test_info.tinfo[i].maxError > maxError )
+        {
+            maxError = test_info.tinfo[i].maxError;
+            maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+        }
+    }
+
+    if( error )
+        goto exit;
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        cl_int *p2 = (cl_int *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = 3;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE/2, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_ulong    *t;
+    cl_double   *r,*s;
+    cl_int      *s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesInt2Count;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_int *ip2 = (cl_int *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            ip2[j] = specialValuesInt2[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesInt2Count )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = DoubleFromUInt32(genrand_int32(d));
+        p2[j] = genrand_int32(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size/2, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_int *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_fi( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_fi( s[j], s2[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if( IsDoubleSubnormal( s[j] ) )
+                    {
+                        long double correct2 = func.f_fi( 0.0, s2[j] );
+                        long double correct3 = func.f_fi( -0.0, s2[j] );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %d}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+exit:
+    return error;
+
+}
+
+
+

diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
deleted file mode 100644
index 69e620a..0000000
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ /dev/null

@@ -1,746 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <climits>
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global int",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global int* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, i0 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       int3 i0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    cl_int maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-
-    // no special values
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.0,
-    -100.0,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-
-    +NAN,
-    +INFINITY,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-};
-
-static size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static const int specialValuesInt[] = {
-    0,       1,  2,  3,  1022,  1023,  1024,   INT_MIN,
-    INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX,
-};
-static constexpr size_t specialValuesIntCount =
-    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    cl_int maxErrorVal2 = 0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        cl_buffer_region region2 = { i * test_info.subBufferSize
-                                         * sizeof(cl_int),
-                                     test_info.subBufferSize * sizeof(cl_int) };
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_ulong *t;
-    cl_double *r;
-    cl_double *s;
-    cl_int *s2;
-
-    Force64BitFPUPrecision();
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
-    cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    size_t idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        cl_double *fp = (cl_double *)p;
-        cl_int *ip2 = (cl_int *)p2;
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            ip2[idx] = specialValuesInt[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesIntCount) break;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = DoubleFromUInt32(genrand_int32(d));
-        p2[idx] = genrand_int32(d);
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size / 2, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_fi(s[j], s2[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    // Verify data
-    t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_fi(s[j], s2[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-
-                if (fail && ftz)
-                {
-                    // retry per section 6.5.3.2
-                    if (IsDoubleResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-
-                    // retry per section 6.5.3.3
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        long double correct2 = func.f_fi(0.0, s2[j]);
-                        long double correct3 = func.f_fi(-0.0, s2[j]);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct2);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-
-                        // retry per section 6.5.3.4
-                        if (IsDoubleResultSubnormal(correct2, ulps)
-                            || IsDoubleResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at {%.13la, %d}: "
-                               "*%.13la vs. %.13la\n",
-                               name, sizeNames[k], err, s[j], s2[j], r[j],
-                               test);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    return error;
-}

diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
deleted file mode 100644
index e65a9aa..0000000
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ /dev/null

@@ -1,740 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <climits>
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global int",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global int* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, i0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       int3 i0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    cl_int maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-
-    // no special values
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-
-    +NAN,
-    +INFINITY,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static const int specialValuesInt[] = {
-    0,           1,           2,           3,          126,        127,
-    128,         0x02000001,  0x04000001,  1465264071, 1488522147, -1,
-    -2,          -3,          -126,        -127,       -128,       -0x02000001,
-    -0x04000001, -1465264071, -1488522147,
-};
-static size_t specialValuesIntCount =
-    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    cl_int maxErrorVal2 = 0;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        cl_buffer_region region2 = { i * test_info.subBufferSize
-                                         * sizeof(cl_int),
-                                     test_info.subBufferSize * sizeof(cl_int) };
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    float ulps = job->ulps;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_uint *t = 0;
-    cl_float *r = 0;
-    cl_float *s = 0;
-    cl_int *s2 = 0;
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    size_t idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        float *fp = (float *)p;
-        cl_int *ip2 = (cl_int *)p2;
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            ip2[idx] = specialValuesInt[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesIntCount) break;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    r = (float *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (float)func.f_fi(s[j], s2[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    // Verify data
-    t = (cl_uint *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_uint *q = out[k];
-
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                float test = ((float *)q)[j];
-                double correct = func.f_fi(s[j], s2[j]);
-                float err = Ulp_Error(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-
-                if (fail && ftz)
-                {
-                    // retry per section 6.5.3.2
-                    if (IsFloatResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-
-                    // retry per section 6.5.3.3
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        double correct2, correct3;
-                        float err2, err3;
-                        correct2 = func.f_fi(0.0, s2[j]);
-                        correct3 = func.f_fi(-0.0, s2[j]);
-                        err2 = Ulp_Error(test, correct2);
-                        err3 = Ulp_Error(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-
-                        // retry per section 6.5.3.4
-                        if (IsFloatResultSubnormal(correct2, ulps)
-                            || IsFloatResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
-                        "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n",
-                        name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
-                        s2[j], r[j], ((uint32_t *)r)[j], test,
-                        ((cl_uint *)&test)[0], j);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    return error;
-}

diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
deleted file mode 100644
index 21e76c8..0000000
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ /dev/null

@@ -1,795 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *operator_symbol, int vectorSize,
-                       cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                       bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = in1[i] ",
-                        operator_symbol,
-                        " in2[i];\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = d0 ",
-        operator_symbol,
-        " d1;\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = d0 ",
-        operator_symbol,
-        " d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *operator_symbol;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    bool relaxedMode; // True if the test is being run in relaxed mode, false
-                      // otherwise.
-
-    // no special fields
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.,
-    -100.,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-
-    +NAN,
-    +INFINITY,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
-                                           bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_ulong *t;
-    cl_double *r;
-    cl_double *s;
-    cl_double *s2;
-
-    Force64BitFPUPrecision();
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
-    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        cl_double *fp = (cl_double *)p;
-        cl_double *fp2 = (cl_double *)p2;
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int64(d);
-        p2[idx] = genrand_int64(d);
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    // Verify data
-    t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-
-                if (fail && ftz)
-                {
-                    // retry per section 6.5.3.2
-                    if (IsDoubleResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-
-
-                    // retry per section 6.5.3.3
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        long double correct2 = func.f_ff(0.0, s2[j]);
-                        long double correct3 = func.f_ff(-0.0, s2[j]);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct2);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-
-                        // retry per section 6.5.3.4
-                        if (IsDoubleResultSubnormal(correct2, ulps)
-                            || IsDoubleResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // try with both args as zero
-                        if (IsDoubleSubnormal(s2[j]))
-                        {
-                            correct2 = func.f_ff(0.0, 0.0);
-                            correct3 = func.f_ff(-0.0, 0.0);
-                            long double correct4 = func.f_ff(0.0, -0.0);
-                            long double correct5 = func.f_ff(-0.0, -0.0);
-                            err2 = Bruteforce_Ulp_Error_Double(test, correct2);
-                            err3 = Bruteforce_Ulp_Error_Double(test, correct3);
-                            float err4 =
-                                Bruteforce_Ulp_Error_Double(test, correct4);
-                            float err5 =
-                                Bruteforce_Ulp_Error_Double(test, correct5);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps))
-                                    && (!(fabsf(err4) <= ulps))
-                                    && (!(fabsf(err5) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps)
-                                || IsDoubleResultSubnormal(correct4, ulps)
-                                || IsDoubleResultSubnormal(correct5, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsDoubleSubnormal(s2[j]))
-                    {
-                        long double correct2 = func.f_ff(s[j], 0.0);
-                        long double correct3 = func.f_ff(s[j], -0.0);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct2);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-
-                        // retry per section 6.5.3.4
-                        if (IsDoubleResultSubnormal(correct2, ulps)
-                            || IsDoubleResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n",
-                        name, sizeNames[k], err, s[j], s2[j], r[j], test);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    return error;
-}

diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
deleted file mode 100644
index ccaef60..0000000
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ /dev/null

@@ -1,925 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *operator_symbol, int vectorSize,
-                       cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                       bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = in1[i] ",
-                        operator_symbol,
-                        " in2[i];\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = f0 ",
-        operator_symbol,
-        " f1;\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = f0 ",
-        operator_symbol,
-        " f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *operator_symbol;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    bool relaxedMode; // True if the test is being run in relaxed mode, false
-                      // otherwise.
-
-    // no special fields
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-
-    +NAN,
-    +INFINITY,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
-                                        bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    bool relaxedMode = job->relaxedMode;
-    float ulps = getAllowedUlpError(job->f, relaxedMode);
-    MTdata d = tinfo->d;
-    cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
-    const char *name = job->f->name;
-    cl_uint *t = 0;
-    cl_float *r = 0;
-    cl_float *s = 0;
-    cl_float *s2 = 0;
-    RoundingMode oldRoundMode;
-
-    if (relaxedMode)
-    {
-        func = job->f->rfunc;
-    }
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    {
-        // Insert special values
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            p[idx] = ((cl_uint *)specialValues)[x];
-            p2[idx] = ((cl_uint *)specialValues)[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-            if (relaxedMode && strcmp(name, "divide") == 0)
-            {
-                cl_uint pj = p[idx] & 0x7fffffff;
-                cl_uint p2j = p2[idx] & 0x7fffffff;
-                // Replace values outside [2^-62, 2^62] with QNaN
-                if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
-                if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-
-        if (relaxedMode && strcmp(name, "divide") == 0)
-        {
-            cl_uint pj = p[idx] & 0x7fffffff;
-            cl_uint p2j = p2[idx] & 0x7fffffff;
-            // Replace values outside [2^-62, 2^62] with QNaN
-            if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
-            if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
-        }
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting)
-    {
-        free(overflow);
-        return CL_SUCCESS;
-    }
-
-    // Calculate the correctly rounded reference result
-    FPU_mode_type oldMode;
-    memset(&oldMode, 0, sizeof(oldMode));
-    if (ftz) ForceFTZ(&oldMode);
-
-    // Set the rounding mode to match the device
-    oldRoundMode = kRoundToNearestEven;
-    if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
-
-    // Calculate the correctly rounded reference result
-    r = (float *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (float *)gIn2 + thread_id * buffer_elements;
-    if (gInfNanSupport)
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-            r[j] = (float)func.f_ff(s[j], s2[j]);
-    }
-    else
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            feclearexcept(FE_OVERFLOW);
-            r[j] = (float)func.f_ff(s[j], s2[j]);
-            overflow[j] =
-                FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-        }
-    }
-
-    if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-
-    if (ftz) RestoreFPState(&oldMode);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    // Verify data
-    t = (cl_uint *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_uint *q = out[k];
-
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                float test = ((float *)q)[j];
-                double correct = func.f_ff(s[j], s2[j]);
-
-                // Per section 10 paragraph 6, accept any result if an input or
-                // output is a infinity or NaN or overflow
-                if (!gInfNanSupport)
-                {
-                    // Note: no double rounding here.  Reference functions
-                    // calculate in single precision.
-                    if (overflow[j] || IsFloatInfinity(correct)
-                        || IsFloatNaN(correct) || IsFloatInfinity(s2[j])
-                        || IsFloatNaN(s2[j]) || IsFloatInfinity(s[j])
-                        || IsFloatNaN(s[j]))
-                        continue;
-                }
-
-                // Per section 10 paragraph 6, accept embedded devices always
-                // returning positive 0.0.
-                if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000))
-                    continue;
-
-                float err = Ulp_Error(test, correct);
-                float errB = Ulp_Error(test, (float)correct);
-
-                int fail =
-                    ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
-                if (fabsf(errB) < fabsf(err)) err = errB;
-
-                if (fail && ftz)
-                {
-                    // retry per section 6.5.3.2
-                    if (IsFloatResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-
-                    // retry per section 6.5.3.3
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        double correct2, correct3;
-                        float err2, err3;
-
-                        if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
-
-                        correct2 = func.f_ff(0.0, s2[j]);
-                        correct3 = func.f_ff(-0.0, s2[j]);
-
-                        // Per section 10 paragraph 6, accept any result if an
-                        // input or output is a infinity or NaN or overflow
-                        if (!gInfNanSupport)
-                        {
-                            if (fetestexcept(FE_OVERFLOW)) continue;
-
-                            // Note: no double rounding here.  Reference
-                            // functions calculate in single precision.
-                            if (IsFloatInfinity(correct2)
-                                || IsFloatNaN(correct2)
-                                || IsFloatInfinity(correct3)
-                                || IsFloatNaN(correct3))
-                                continue;
-                        }
-
-                        err2 = Ulp_Error(test, correct2);
-                        err3 = Ulp_Error(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-
-                        // retry per section 6.5.3.4
-                        if (IsFloatResultSubnormal(correct2, ulps)
-                            || IsFloatResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // try with both args as zero
-                        if (IsFloatSubnormal(s2[j]))
-                        {
-                            double correct4, correct5;
-                            float err4, err5;
-
-                            if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
-
-                            correct2 = func.f_ff(0.0, 0.0);
-                            correct3 = func.f_ff(-0.0, 0.0);
-                            correct4 = func.f_ff(0.0, -0.0);
-                            correct5 = func.f_ff(-0.0, -0.0);
-
-                            // Per section 10 paragraph 6, accept any result if
-                            // an input or output is a infinity or NaN or
-                            // overflow
-                            if (!gInfNanSupport)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3)
-                                    || IsFloatInfinity(correct4)
-                                    || IsFloatNaN(correct4)
-                                    || IsFloatInfinity(correct5)
-                                    || IsFloatNaN(correct5))
-                                    continue;
-                            }
-
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            err4 = Ulp_Error(test, correct4);
-                            err5 = Ulp_Error(test, correct5);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps))
-                                    && (!(fabsf(err4) <= ulps))
-                                    && (!(fabsf(err5) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-
-                            // retry per section 6.5.3.4
-                            if (IsFloatResultSubnormal(correct2, ulps)
-                                || IsFloatResultSubnormal(correct3, ulps)
-                                || IsFloatResultSubnormal(correct4, ulps)
-                                || IsFloatResultSubnormal(correct5, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsFloatSubnormal(s2[j]))
-                    {
-                        double correct2, correct3;
-                        float err2, err3;
-
-                        if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
-
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
-
-                        // Per section 10 paragraph 6, accept any result if an
-                        // input or output is a infinity or NaN or overflow
-                        if (!gInfNanSupport)
-                        {
-                            // Note: no double rounding here.  Reference
-                            // functions calculate in single precision.
-                            if (overflow[j] || IsFloatInfinity(correct)
-                                || IsFloatNaN(correct)
-                                || IsFloatInfinity(correct2)
-                                || IsFloatNaN(correct2))
-                                continue;
-                        }
-
-                        err2 = Ulp_Error(test, correct2);
-                        err3 = Ulp_Error(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-
-                        // retry per section 6.5.3.4
-                        if (IsFloatResultSubnormal(correct2, ulps)
-                            || IsFloatResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-
-
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a "
-                               "vs. %a (0x%8.8x) at index: %d\n",
-                               name, sizeNames[k], err, s[j], s2[j], r[j], test,
-                               ((cl_uint *)&test)[0], j);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    if (overflow) free(overflow);
-    return error;
-}

diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp
new file mode 100644
index 0000000..91cebf5
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_two_results_i.cpp

@@ -0,0 +1,1132 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <limits.h>
+#include <string.h>
+#include "FunctionList.h"
+
+#define PARALLEL_REFERENCE
+
+int TestFunc_FloatI_Float_Float(const Func *f, MTdata);
+int TestFunc_DoubleI_Double_Double(const Func *f, MTdata);
+
+extern const vtbl _binary_two_results_i = { "binary_two_results_i",
+                                            TestFunc_FloatI_Float_Float,
+                                            TestFunc_DoubleI_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       f0 = ", name, "( f0, f1, &i0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( i0, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       f0 = ", name, "( f0, f1, &i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       d0 = ", name, "( d0, d1, &i0 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "       vstore3( i0, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       d0 = ", name, "( d0, d1, &i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               out2[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               out2[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+#if defined PARALLEL_REFERENCE
+typedef struct ComputeReferenceInfoF_
+{
+    const float *x;
+    const float *y;
+    float *r;
+    int *i;
+    double (*f_ffpI)(double, double, int*);
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoF;
+
+typedef struct ComputeReferenceInfoD_
+{
+    const double *x;
+    const double *y;
+    double *r;
+    int *i;
+    long double (*f_ffpI)(long double, long double, int*);
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoD;
+
+static cl_int
+ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const float *x = cri->x + off;
+    const float *y = cri->y + off;
+    float *r = cri->r + off;
+    int *i = cri->i + off;
+    double (*f)(double, double, int *) = cri->f_ffpI;
+    cl_uint j;
+
+    if (off + count > lim)
+    count = lim - off;
+
+    for (j = 0; j < count; ++j)
+    r[j] = (float)f((double)x[j], (double)y[j], i + j);
+
+    return CL_SUCCESS;
+}
+
+static cl_int
+ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const double *x = cri->x + off;
+    const double *y = cri->y + off;
+    double *r = cri->r + off;
+    int *i = cri->i + off;
+    long double (*f)(long double, long double, int *) = cri->f_ffpI;
+    cl_uint j;
+
+    if (off + count > lim)
+    count = lim - off;
+
+    Force64BitFPUPrecision();
+
+    for (j = 0; j < count; ++j)
+    r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
+
+    return CL_SUCCESS;
+}
+
+#endif
+
+int TestFunc_FloatI_Float_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    float float_ulps;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+
+#if defined PARALLEL_REFERENCE
+    cl_uint threadCount = GetThreadCount();
+#endif
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    if(gWimpyMode ){
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    int testingRemquo = !strcmp(f->name, "remquo");
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        cl_uint *p = (cl_uint *)gIn;
+        cl_uint *p2 = (cl_uint *)gIn2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        // Calculate the correctly rounded reference result
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+
+#if defined PARALLEL_REFERENCE
+    if (threadCount > 1) {
+        ComputeReferenceInfoF cri;
+        cri.x = s;
+        cri.y = s2;
+        cri.r = (float *)gOut_Ref;
+        cri.i = (int *)gOut_Ref2;
+        cri.f_ffpI = f->func.f_ffpI;
+        cri.lim = bufferSize / sizeof( float );
+        cri.count = (cri.lim + threadCount - 1) / threadCount;
+        ThreadPool_Do(ReferenceF, threadCount, &cri);
+    } else {
+#endif
+            float *r = (float *)gOut_Ref;
+            int *r2 = (int *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                r[j] = (float) f->func.f_ffpI( s[j], s2[j], r2+j );
+#if defined PARALLEL_REFERENCE
+    }
+#endif
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)gOut[k];
+                int32_t *q2 = (int32_t *)gOut2[k];
+
+                // Check for exact match to correctly rounded result
+        if (t[j] == q[j] && t2[j] == q2[j])
+            continue;
+
+        // Check for paired NaNs
+        if ((t[j] & 0x7fffffff) > 0x7f800000 && (q[j] & 0x7fffffff) > 0x7f800000 && t2[j] == q2[j])
+            continue;
+
+                // if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    float test = ((float*) q)[j];
+                    int correct2 = INT_MIN;
+                    double correct = f->func.f_ffpI( s[j], s2[j], &correct2 );
+                    float err = Ulp_Error( test, correct );
+                    int64_t iErr;
+
+                    // in case of remquo, we only care about the sign and last seven bits of
+                    // integer as per the spec.
+                    if(testingRemquo)
+                        iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f);
+                    else
+                        iErr = (long long) q2[j] - (long long) correct2;
+
+                    //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects
+                    //to say what is returned in iptr or leaves it undefined or implementation defined.
+                    int iptrUndefined = fabs(((float*) gIn)[j]) == INFINITY ||
+                                        ((float*) gIn2)[j] == 0.0f          ||
+                                        isnan(((float*) gIn2)[j])           ||
+                                        isnan(((float*) gIn)[j]);
+                    if(iptrUndefined)
+                         iErr = 0;
+
+                    int fail = ! (fabsf(err) <= float_ulps && iErr == 0 );
+                    if( ftz && fail )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatResultSubnormal(correct, float_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            double correct3 = f->func.f_ffpI( 0.0, s2[j], &correct3i );
+                            double correct4 = f->func.f_ffpI( -0.0, s2[j], &correct4i );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with both args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            {
+                                int correct7i, correct8i;
+                                correct3 = f->func.f_ffpI( 0.0, 0.0, &correct3i );
+                                correct4 = f->func.f_ffpI( -0.0, 0.0, &correct4i );
+                                double correct7 = f->func.f_ffpI( 0.0, -0.0, &correct7i );
+                                double correct8 = f->func.f_ffpI( -0.0, -0.0, &correct8i );
+                                err2 = Ulp_Error( test, correct3  );
+                                err3 = Ulp_Error( test, correct4  );
+                                float err4 = Ulp_Error( test, correct7  );
+                                float err5 = Ulp_Error( test, correct8  );
+                                iErr3 = (long long) q2[j] - (long long) correct3i;
+                                iErr4 = (long long) q2[j] - (long long) correct4i;
+                                int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
+                                int64_t iErr8 = (long long) q2[j] - (long long) correct8i;
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps  && iErr4 == 0)) &&
+                                                 (!(fabsf(err4) <= float_ulps  && iErr7 == 0)) && (!(fabsf(err5) <= float_ulps  && iErr8 == 0)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                if( llabs(iErr3) < llabs( iErr ) )
+                                    iErr = iErr3;
+                                if( llabs(iErr4) < llabs( iErr ) )
+                                    iErr = iErr4;
+                                if( llabs(iErr7) < llabs( iErr ) )
+                                    iErr = iErr7;
+                                if( llabs(iErr8) < llabs( iErr ) )
+                                    iErr = iErr8;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct3, float_ulps ) || IsFloatResultSubnormal(correct4, float_ulps )  ||
+                                    IsFloatResultSubnormal(correct7, float_ulps ) || IsFloatResultSubnormal(correct8, float_ulps ) )
+                                {
+                                    fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0));
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( IsFloatSubnormal( s2[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            double correct3 = f->func.f_ffpI( s[j], 0.0, &correct3i );
+                            double correct4 = f->func.f_ffpI( s[j], -0.0, &correct4i );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} ({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, 0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
+                                    f->name, sizeNames[k], err, iErr,
+                                   ((float*) gIn)[j], ((float*) gIn2)[j],
+                                   ((cl_uint*) gIn)[j], ((cl_uint*) gIn2)[j],
+                                   ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j],
+                                   ((cl_uint*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j],
+                                   test, q2[j],
+                                   ((cl_uint*)&test)[0], ((cl_uint*) q2)[j] );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( double );
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if(gWimpyMode ){
+       step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+#if defined PARALLEL_REFERENCE
+    cl_uint threadCount = GetThreadCount();
+#endif
+
+    Force64BitFPUPrecision();
+
+    int testingRemquo = !strcmp(f->name, "remquo");
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                    &build_info ) ))
+        {
+            return error;
+        }
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+
+#if defined PARALLEL_REFERENCE
+    if (threadCount > 1) {
+        ComputeReferenceInfoD cri;
+        cri.x = s;
+        cri.y = s2;
+        cri.r = (double *)gOut_Ref;
+        cri.i = (int *)gOut_Ref2;
+        cri.f_ffpI = f->dfunc.f_ffpI;
+        cri.lim = bufferSize / sizeof( double );
+        cri.count = (cri.lim + threadCount - 1) / threadCount;
+        ThreadPool_Do(ReferenceD, threadCount, &cri);
+    } else {
+#endif
+            double *r = (double *)gOut_Ref;
+            int *r2 = (int *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                r[j] = (double) f->dfunc.f_ffpI( s[j], s2[j], r2+j );
+#if defined PARALLEL_REFERENCE
+    }
+#endif
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)gOut[k];
+                int32_t *q2 = (int32_t *)gOut2[k];
+
+        // Check for exact match to correctly rounded result
+        if (t[j] == q[j] && t2[j] == q2[j])
+            continue;
+
+        // Check for paired NaNs
+        if ((t[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL &&
+            (q[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL &&
+            t2[j] == q2[j])
+            continue;
+
+                // if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    double test = ((double*) q)[j];
+                    int correct2 = INT_MIN;
+                    long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    int64_t iErr;
+
+                    // in case of remquo, we only care about the sign and last seven bits of
+                    // integer as per the spec.
+                    if(testingRemquo)
+                        iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f);
+                    else
+                        iErr = (long long) q2[j] - (long long) correct2;
+
+                    //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects
+                    //to say what is returned in iptr or leaves it undefined or implementation defined.
+                    int iptrUndefined = fabs(((double*) gIn)[j]) == INFINITY ||
+                                        ((double*) gIn2)[j] == 0.0          ||
+                                        isnan(((double*) gIn2)[j])           ||
+                                        isnan(((double*) gIn)[j]);
+                    if(iptrUndefined)
+                         iErr = 0;
+
+                    int fail = ! (fabsf(err) <= f->double_ulps && iErr == 0 );
+                    if( ftz && fail )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i );
+                            long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with both args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            {
+                                int correct7i, correct8i;
+                                correct3 = f->dfunc.f_ffpI( 0.0, 0.0, &correct3i );
+                                correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i );
+                                long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i );
+                                long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                iErr3 = (long long) q2[j] - (long long) correct3i;
+                                iErr4 = (long long) q2[j] - (long long) correct4i;
+                                int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
+                                int64_t iErr8 = (long long) q2[j] - (long long) correct8i;
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps  && iErr4 == 0)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps  && iErr7 == 0)) && (!(fabsf(err5) <= f->double_ulps  && iErr8 == 0)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                if( llabs(iErr3) < llabs( iErr ) )
+                                    iErr = iErr3;
+                                if( llabs(iErr4) < llabs( iErr ) )
+                                    iErr = iErr4;
+                                if( llabs(iErr7) < llabs( iErr ) )
+                                    iErr = iErr7;
+                                if( llabs(iErr8) < llabs( iErr ) )
+                                    iErr = iErr8;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct3, f->double_ulps ) || IsDoubleResultSubnormal( correct4, f->double_ulps )  ||
+                                    IsDoubleResultSubnormal( correct7, f->double_ulps ) || IsDoubleResultSubnormal( correct8, f->double_ulps ) )
+                                {
+                                    fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0));
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( IsDoubleSubnormal( s2[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i );
+                            long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, %.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, %d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ 0x%16.16llx, 0x%8.8x})\n",
+                                    f->name, sizeNames[k], err, iErr,
+                                   ((double*) gIn)[j], ((double*) gIn2)[j],
+                                   ((cl_ulong*) gIn)[j], ((cl_ulong*) gIn2)[j],
+                                   ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j],
+                                   ((cl_ulong*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j],
+                                   test, q2[j],
+                                   ((cl_ulong*) q)[j], ((cl_uint*) q2)[j]);
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+

diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
deleted file mode 100644
index 14f4109..0000000
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ /dev/null

@@ -1,580 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <climits>
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global int* out2, __global double* in, "
-        "__global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, &i0 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "       vstore3( i0, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, &i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               out2[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               out2[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-typedef struct ComputeReferenceInfoD_
-{
-    const double *x;
-    const double *y;
-    double *r;
-    int *i;
-    long double (*f_ffpI)(long double, long double, int *);
-    cl_uint lim;
-    cl_uint count;
-} ComputeReferenceInfoD;
-
-static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
-{
-    ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
-    cl_uint lim = cri->lim;
-    cl_uint count = cri->count;
-    cl_uint off = jid * count;
-    const double *x = cri->x + off;
-    const double *y = cri->y + off;
-    double *r = cri->r + off;
-    int *i = cri->i + off;
-    long double (*f)(long double, long double, int *) = cri->f_ffpI;
-
-    if (off + count > lim) count = lim - off;
-
-    Force64BitFPUPrecision();
-
-    for (cl_uint j = 0; j < count; ++j)
-        r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
-
-    return CL_SUCCESS;
-}
-
-int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int64_t maxError2 = 0;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    cl_uint threadCount = GetThreadCount();
-
-    Force64BitFPUPrecision();
-
-    int testingRemquo = !strcmp(f->name, "remquo");
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        double *p2 = (double *)gIn2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            p[j] = DoubleFromUInt32(genrand_int32(d));
-            p2[j] = DoubleFromUInt32(genrand_int32(d));
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        double *s = (double *)gIn;
-        double *s2 = (double *)gIn2;
-
-        if (threadCount > 1)
-        {
-            ComputeReferenceInfoD cri;
-            cri.x = s;
-            cri.y = s2;
-            cri.r = (double *)gOut_Ref;
-            cri.i = (int *)gOut_Ref2;
-            cri.f_ffpI = f->dfunc.f_ffpI;
-            cri.lim = BUFFER_SIZE / sizeof(double);
-            cri.count = (cri.lim + threadCount - 1) / threadCount;
-            ThreadPool_Do(ReferenceD, threadCount, &cri);
-        }
-        else
-        {
-            double *r = (double *)gOut_Ref;
-            int *r2 = (int *)gOut_Ref2;
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-                r[j] = (double)f->dfunc.f_ffpI(s[j], s2[j], r2 + j);
-        }
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)gOut[k];
-                int32_t *q2 = (int32_t *)gOut2[k];
-
-                // Check for exact match to correctly rounded result
-                if (t[j] == q[j] && t2[j] == q2[j]) continue;
-
-                // Check for paired NaNs
-                if ((t[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL
-                    && (q[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL
-                    && t2[j] == q2[j])
-                    continue;
-
-                double test = ((double *)q)[j];
-                int correct2 = INT_MIN;
-                long double correct = f->dfunc.f_ffpI(s[j], s2[j], &correct2);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int64_t iErr;
-
-                // in case of remquo, we only care about the sign and last
-                // seven bits of integer as per the spec.
-                if (testingRemquo)
-                    iErr = (long long)(q2[j] & 0x0000007f)
-                        - (long long)(correct2 & 0x0000007f);
-                else
-                    iErr = (long long)q2[j] - (long long)correct2;
-
-                // For remquo, if y = 0, x is infinite, or either is NaN
-                // then the standard either neglects to say what is returned
-                // in iptr or leaves it undefined or implementation defined.
-                int iptrUndefined = fabs(((double *)gIn)[j]) == INFINITY
-                    || ((double *)gIn2)[j] == 0.0 || isnan(((double *)gIn2)[j])
-                    || isnan(((double *)gIn)[j]);
-                if (iptrUndefined) iErr = 0;
-
-                int fail = !(fabsf(err) <= f->double_ulps && iErr == 0);
-                if (ftz && fail)
-                {
-                    // retry per section 6.5.3.2
-                    if (IsDoubleResultSubnormal(correct, f->double_ulps))
-                    {
-                        fail = fail && !(test == 0.0f && iErr == 0);
-                        if (!fail) err = 0.0f;
-                    }
-
-                    // retry per section 6.5.3.3
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        int correct3i, correct4i;
-                        long double correct3 =
-                            f->dfunc.f_ffpI(0.0, s2[j], &correct3i);
-                        long double correct4 =
-                            f->dfunc.f_ffpI(-0.0, s2[j], &correct4i);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= f->double_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-
-                        // retry per section 6.5.3.4
-                        if (IsDoubleResultSubnormal(correct2, f->double_ulps)
-                            || IsDoubleResultSubnormal(correct3,
-                                                       f->double_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // try with both args as zero
-                        if (IsDoubleSubnormal(s2[j]))
-                        {
-                            int correct7i, correct8i;
-                            correct3 = f->dfunc.f_ffpI(0.0, 0.0, &correct3i);
-                            correct4 = f->dfunc.f_ffpI(-0.0, 0.0, &correct4i);
-                            long double correct7 =
-                                f->dfunc.f_ffpI(0.0, -0.0, &correct7i);
-                            long double correct8 =
-                                f->dfunc.f_ffpI(-0.0, -0.0, &correct8i);
-                            err2 = Bruteforce_Ulp_Error_Double(test, correct3);
-                            err3 = Bruteforce_Ulp_Error_Double(test, correct4);
-                            float err4 =
-                                Bruteforce_Ulp_Error_Double(test, correct7);
-                            float err5 =
-                                Bruteforce_Ulp_Error_Double(test, correct8);
-                            iErr3 = (long long)q2[j] - (long long)correct3i;
-                            iErr4 = (long long)q2[j] - (long long)correct4i;
-                            int64_t iErr7 =
-                                (long long)q2[j] - (long long)correct7i;
-                            int64_t iErr8 =
-                                (long long)q2[j] - (long long)correct8i;
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps
-                                       && iErr3 == 0))
-                                    && (!(fabsf(err3) <= f->double_ulps
-                                          && iErr4 == 0))
-                                    && (!(fabsf(err4) <= f->double_ulps
-                                          && iErr7 == 0))
-                                    && (!(fabsf(err5) <= f->double_ulps
-                                          && iErr8 == 0)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-                            if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                            if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                            if (llabs(iErr7) < llabs(iErr)) iErr = iErr7;
-                            if (llabs(iErr8) < llabs(iErr)) iErr = iErr8;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct3,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct4,
-                                                           f->double_ulps)
-                                || IsDoubleResultSubnormal(correct7,
-                                                           f->double_ulps)
-                                || IsDoubleResultSubnormal(correct8,
-                                                           f->double_ulps))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (iErr3 == 0 || iErr4 == 0
-                                             || iErr7 == 0 || iErr8 == 0));
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsDoubleSubnormal(s2[j]))
-                    {
-                        int correct3i, correct4i;
-                        long double correct3 =
-                            f->dfunc.f_ffpI(s[j], 0.0, &correct3i);
-                        long double correct4 =
-                            f->dfunc.f_ffpI(s[j], -0.0, &correct4i);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= f->double_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-
-                        // retry per section 6.5.3.4
-                        if (IsDoubleResultSubnormal(correct2, f->double_ulps)
-                            || IsDoubleResultSubnormal(correct3,
-                                                       f->double_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > maxError)
-                {
-                    maxError = fabsf(err);
-                    maxErrorVal = s[j];
-                }
-                if (llabs(iErr) > maxError2)
-                {
-                    maxError2 = llabs(iErr);
-                    maxErrorVal2 = s[j];
-                }
-
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, "
-                        "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, "
-                        "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ "
-                        "0x%16.16llx, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((double *)gIn)[j],
-                        ((double *)gIn2)[j], ((cl_ulong *)gIn)[j],
-                        ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
deleted file mode 100644
index 5ef44b6..0000000
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ /dev/null

@@ -1,565 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <climits>
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global int* out2, __global float* in, "
-        "__global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, &i0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( i0, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, &i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-typedef struct ComputeReferenceInfoF_
-{
-    const float *x;
-    const float *y;
-    float *r;
-    int *i;
-    double (*f_ffpI)(double, double, int *);
-    cl_uint lim;
-    cl_uint count;
-} ComputeReferenceInfoF;
-
-static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
-{
-    ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
-    cl_uint lim = cri->lim;
-    cl_uint count = cri->count;
-    cl_uint off = jid * count;
-    const float *x = cri->x + off;
-    const float *y = cri->y + off;
-    float *r = cri->r + off;
-    int *i = cri->i + off;
-    double (*f)(double, double, int *) = cri->f_ffpI;
-
-    if (off + count > lim) count = lim - off;
-
-    for (cl_uint j = 0; j < count; ++j)
-        r[j] = (float)f((double)x[j], (double)y[j], i + j);
-
-    return CL_SUCCESS;
-}
-
-int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    int64_t maxError2 = 0;
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-
-    cl_uint threadCount = GetThreadCount();
-
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-
-    int testingRemquo = !strcmp(f->name, "remquo");
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        cl_uint *p2 = (cl_uint *)gIn2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            p[j] = genrand_int32(d);
-            p2[j] = genrand_int32(d);
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        float *s = (float *)gIn;
-        float *s2 = (float *)gIn2;
-
-        if (threadCount > 1)
-        {
-            ComputeReferenceInfoF cri;
-            cri.x = s;
-            cri.y = s2;
-            cri.r = (float *)gOut_Ref;
-            cri.i = (int *)gOut_Ref2;
-            cri.f_ffpI = f->func.f_ffpI;
-            cri.lim = BUFFER_SIZE / sizeof(float);
-            cri.count = (cri.lim + threadCount - 1) / threadCount;
-            ThreadPool_Do(ReferenceF, threadCount, &cri);
-        }
-        else
-        {
-            float *r = (float *)gOut_Ref;
-            int *r2 = (int *)gOut_Ref2;
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                r[j] = (float)f->func.f_ffpI(s[j], s2[j], r2 + j);
-        }
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                int32_t *q2 = (int32_t *)gOut2[k];
-
-                // Check for exact match to correctly rounded result
-                if (t[j] == q[j] && t2[j] == q2[j]) continue;
-
-                // Check for paired NaNs
-                if ((t[j] & 0x7fffffff) > 0x7f800000
-                    && (q[j] & 0x7fffffff) > 0x7f800000 && t2[j] == q2[j])
-                    continue;
-
-                float test = ((float *)q)[j];
-                int correct2 = INT_MIN;
-                double correct = f->func.f_ffpI(s[j], s2[j], &correct2);
-                float err = Ulp_Error(test, correct);
-                int64_t iErr;
-
-                // in case of remquo, we only care about the sign and last
-                // seven bits of integer as per the spec.
-                if (testingRemquo)
-                    iErr = (long long)(q2[j] & 0x0000007f)
-                        - (long long)(correct2 & 0x0000007f);
-                else
-                    iErr = (long long)q2[j] - (long long)correct2;
-
-                // For remquo, if y = 0, x is infinite, or either is NaN
-                // then the standard either neglects to say what is returned
-                // in iptr or leaves it undefined or implementation defined.
-                int iptrUndefined = fabs(((float *)gIn)[j]) == INFINITY
-                    || ((float *)gIn2)[j] == 0.0f || isnan(((float *)gIn2)[j])
-                    || isnan(((float *)gIn)[j]);
-                if (iptrUndefined) iErr = 0;
-
-                int fail = !(fabsf(err) <= float_ulps && iErr == 0);
-                if (ftz && fail)
-                {
-                    // retry per section 6.5.3.2
-                    if (IsFloatResultSubnormal(correct, float_ulps))
-                    {
-                        fail = fail && !(test == 0.0f && iErr == 0);
-                        if (!fail) err = 0.0f;
-                    }
-
-                    // retry per section 6.5.3.3
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        int correct3i, correct4i;
-                        double correct3 =
-                            f->func.f_ffpI(0.0, s2[j], &correct3i);
-                        double correct4 =
-                            f->func.f_ffpI(-0.0, s2[j], &correct4i);
-                        float err2 = Ulp_Error(test, correct3);
-                        float err3 = Ulp_Error(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= float_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= float_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-
-                        // retry per section 6.5.3.4
-                        if (IsFloatResultSubnormal(correct2, float_ulps)
-                            || IsFloatResultSubnormal(correct3, float_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // try with both args as zero
-                        if (IsFloatSubnormal(s2[j]))
-                        {
-                            int correct7i, correct8i;
-                            correct3 = f->func.f_ffpI(0.0, 0.0, &correct3i);
-                            correct4 = f->func.f_ffpI(-0.0, 0.0, &correct4i);
-                            double correct7 =
-                                f->func.f_ffpI(0.0, -0.0, &correct7i);
-                            double correct8 =
-                                f->func.f_ffpI(-0.0, -0.0, &correct8i);
-                            err2 = Ulp_Error(test, correct3);
-                            err3 = Ulp_Error(test, correct4);
-                            float err4 = Ulp_Error(test, correct7);
-                            float err5 = Ulp_Error(test, correct8);
-                            iErr3 = (long long)q2[j] - (long long)correct3i;
-                            iErr4 = (long long)q2[j] - (long long)correct4i;
-                            int64_t iErr7 =
-                                (long long)q2[j] - (long long)correct7i;
-                            int64_t iErr8 =
-                                (long long)q2[j] - (long long)correct8i;
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps && iErr3 == 0))
-                                    && (!(fabsf(err3) <= float_ulps
-                                          && iErr4 == 0))
-                                    && (!(fabsf(err4) <= float_ulps
-                                          && iErr7 == 0))
-                                    && (!(fabsf(err5) <= float_ulps
-                                          && iErr8 == 0)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-                            if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                            if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                            if (llabs(iErr7) < llabs(iErr)) iErr = iErr7;
-                            if (llabs(iErr8) < llabs(iErr)) iErr = iErr8;
-
-                            // retry per section 6.5.3.4
-                            if (IsFloatResultSubnormal(correct3, float_ulps)
-                                || IsFloatResultSubnormal(correct4, float_ulps)
-                                || IsFloatResultSubnormal(correct7, float_ulps)
-                                || IsFloatResultSubnormal(correct8, float_ulps))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (iErr3 == 0 || iErr4 == 0
-                                             || iErr7 == 0 || iErr8 == 0));
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsFloatSubnormal(s2[j]))
-                    {
-                        int correct3i, correct4i;
-                        double correct3 = f->func.f_ffpI(s[j], 0.0, &correct3i);
-                        double correct4 =
-                            f->func.f_ffpI(s[j], -0.0, &correct4i);
-                        float err2 = Ulp_Error(test, correct3);
-                        float err3 = Ulp_Error(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= float_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= float_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-
-                        // retry per section 6.5.3.4
-                        if (IsFloatResultSubnormal(correct2, float_ulps)
-                            || IsFloatResultSubnormal(correct3, float_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > maxError)
-                {
-                    maxError = fabsf(err);
-                    maxErrorVal = s[j];
-                }
-                if (llabs(iErr) > maxError2)
-                {
-                    maxError2 = llabs(iErr);
-                    maxErrorVal2 = s[j];
-                }
-
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} "
-                        "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
-                        "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((float *)gIn)[j],
-                        ((float *)gIn2)[j], ((cl_uint *)gIn)[j],
-                        ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
deleted file mode 100644
index 3edbb48..0000000
--- a/test_conformance/math_brute_force/function_list.cpp
+++ /dev/null

@@ -1,391 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "reference_math.h"
-#include "test_functions.h"
-
-#define FTZ_ON 1
-#define FTZ_OFF 0
-#define EXACT 0.0f
-#define RELAXED_ON 1
-#define RELAXED_OFF 0
-
-#define STRINGIFY(_s) #_s
-
-// Only use ulps information in spir test
-#ifdef FUNCTION_LIST_ULPS_ONLY
-
-#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
-            RELAXED_OFF, _type                                                 \
-    }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type,     \
-                  _relaxed_embedded_ulp)                                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp,    \
-            _rmode, RELAXED_ON, _type                                          \
-    }
-#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
-    {                                                                          \
-        "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL },          \
-            { NULL }, { NULL }, _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, \
-            _rmode, RELAXED_OFF, _type                                         \
-    }
-#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)   \
-    {                                                                          \
-        STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
-    }
-#define unaryF NULL
-#define i_unaryF NULL
-#define unaryF_u NULL
-#define macro_unaryF NULL
-#define binaryF NULL
-#define binaryOperatorF NULL
-#define binaryF_i NULL
-#define macro_binaryF NULL
-#define ternaryF NULL
-#define unaryF_two_results NULL
-#define unaryF_two_results_i NULL
-#define binaryF_two_results_i NULL
-#define mad_function NULL
-
-#define reference_sqrt NULL
-#define reference_sqrtl NULL
-#define reference_divide NULL
-#define reference_dividel NULL
-#define reference_relaxed_divide NULL
-
-#else // FUNCTION_LIST_ULPS_ONLY
-
-#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
-            { (void*)reference_##_name##l }, { (void*)reference_##_name },     \
-            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
-            RELAXED_OFF, _type                                                 \
-    }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type,     \
-                  _relaxed_embedded_ulp)                                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
-            { (void*)reference_##_name##l },                                   \
-            { (void*)reference_##relaxed_##_name }, _ulp, _ulp, _embedded_ulp, \
-            _relaxed_ulp, _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type     \
-    }
-#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
-    {                                                                          \
-        "half_" STRINGIFY(_name), "half_" STRINGIFY(_name),                    \
-            { (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp,      \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
-    }
-#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)   \
-    {                                                                          \
-        STRINGIFY(_name), _operator, { (void*)reference_##_name },             \
-            { (void*)reference_##_name##l }, { NULL }, _ulp, _ulp,             \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
-    }
-
-static constexpr vtbl _unary = {
-    "unary",
-    TestFunc_Float_Float,
-    TestFunc_Double_Double,
-};
-
-static constexpr vtbl _i_unary = {
-    "i_unary",
-    TestFunc_Int_Float,
-    TestFunc_Int_Double,
-};
-
-static constexpr vtbl _unary_u = {
-    "unary_u",
-    TestFunc_Float_UInt,
-    TestFunc_Double_ULong,
-};
-
-static constexpr vtbl _macro_unary = {
-    "macro_unary",
-    TestMacro_Int_Float,
-    TestMacro_Int_Double,
-};
-
-static constexpr vtbl _binary = {
-    "binary",
-    TestFunc_Float_Float_Float,
-    TestFunc_Double_Double_Double,
-};
-
-static constexpr vtbl _binary_operator = {
-    "binaryOperator",
-    TestFunc_Float_Float_Float_Operator,
-    TestFunc_Double_Double_Double_Operator,
-};
-
-static constexpr vtbl _binary_i = {
-    "binary_i",
-    TestFunc_Float_Float_Int,
-    TestFunc_Double_Double_Int,
-};
-
-static constexpr vtbl _macro_binary = {
-    "macro_binary",
-    TestMacro_Int_Float_Float,
-    TestMacro_Int_Double_Double,
-};
-
-static constexpr vtbl _ternary = {
-    "ternary",
-    TestFunc_Float_Float_Float_Float,
-    TestFunc_Double_Double_Double_Double,
-};
-
-static constexpr vtbl _unary_two_results = {
-    "unary_two_results",
-    TestFunc_Float2_Float,
-    TestFunc_Double2_Double,
-};
-
-static constexpr vtbl _unary_two_results_i = {
-    "unary_two_results_i",
-    TestFunc_FloatI_Float,
-    TestFunc_DoubleI_Double,
-};
-
-static constexpr vtbl _binary_two_results_i = {
-    "binary_two_results_i",
-    TestFunc_FloatI_Float_Float,
-    TestFunc_DoubleI_Double_Double,
-};
-
-static constexpr vtbl _mad_tbl = {
-    "ternary",
-    TestFunc_mad_Float,
-    TestFunc_mad_Double,
-};
-
-#define unaryF &_unary
-#define i_unaryF &_i_unary
-#define unaryF_u &_unary_u
-#define macro_unaryF &_macro_unary
-#define binaryF &_binary
-#define binaryOperatorF &_binary_operator
-#define binaryF_i &_binary_i
-#define macro_binaryF &_macro_binary
-#define ternaryF &_ternary
-#define unaryF_two_results &_unary_two_results
-#define unaryF_two_results_i &_unary_two_results_i
-#define binaryF_two_results_i &_binary_two_results_i
-#define mad_function &_mad_tbl
-
-#endif // FUNCTION_LIST_ULPS_ONLY
-
-const Func functionList[] = {
-    ENTRY_EXT(acos, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(acosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(acospi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(asin, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(asinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(asinpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(atan, 5.0f, 5.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(atanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(atanpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(atan2, 6.0f, 6.0f, FTZ_OFF, binaryF),
-    ENTRY(atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
-    ENTRY(cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY_EXT(cos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(cospi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    //                                  ENTRY( erfc,                  16.0f,
-    //                                  16.0f,         FTZ_OFF,     unaryF),
-    //                                  //disabled for 1.0 due to lack of
-    //                                  reference implementation ENTRY( erf,
-    //                                  16.0f,         16.0f,         FTZ_OFF,
-    //                                  unaryF), //disabled for 1.0 due to lack
-    //                                  of reference implementation
-    ENTRY_EXT(exp, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
-              4.0f), // relaxed error is actually overwritten in unary.c as it
-                     // is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp2, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
-              4.0f), // relaxed error is actually overwritten in unary.c as it
-                     // is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp10, 3.0f, 4.0f, 8192.0f, FTZ_OFF, unaryF,
-              8192.0f), // relaxed error is actually overwritten in unary.c as
-                        // it is 3+floor(fabs(2*x)) in derived mode,
-    // in non-derived mode it uses the ulp error for half_exp10.
-    ENTRY(expm1, 3.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(fabs, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fdim, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(floor, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fma, 0.0f, 0.0f, FTZ_OFF, ternaryF),
-    ENTRY(fmax, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmin, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmod, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fract, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(frexp, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
-    ENTRY(hypot, 4.0f, 4.0f, FTZ_OFF, binaryF),
-    ENTRY(ilogb, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
-    ENTRY(isequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isfinite, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isgreaterequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isinf, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isless, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isnan, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnormal, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnotequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isunordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(ldexp, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
-    ENTRY(lgamma, INFINITY, INFINITY, FTZ_OFF, unaryF),
-    ENTRY(lgamma_r, INFINITY, INFINITY, FTZ_OFF, unaryF_two_results_i),
-    ENTRY_EXT(log, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
-              4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log2, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
-              4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log10, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
-              4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY(log1p, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(logb, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function,
-              INFINITY), // in fast-relaxed-math mode it has to be either
-                         // exactly rounded fma or exactly rounded a*b+c
-    ENTRY(maxmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(minmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
-    ENTRY(nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY_EXT(pow, 16.0f, 16.0f, 8192.0f, FTZ_OFF, binaryF,
-              8192.0f), // in derived mode the ulp error is calculated as
-                        // exp2(y*log2(x)) and in non-derived it is the same as
-                        // half_pow
-    ENTRY(pown, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
-    ENTRY(powr, 16.0f, 16.0f, FTZ_OFF, binaryF),
-    //                                  ENTRY( reciprocal,            1.0f,
-    //                                  1.0f,         FTZ_OFF,     unaryF),
-    ENTRY(remainder, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(remquo, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
-    ENTRY(rint, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rootn, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
-    ENTRY(round, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rsqrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(signbit, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY_EXT(sin, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    ENTRY_EXT(sincos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF_two_results,
-              0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(sinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(sinpi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    { "sqrt",
-      "sqrt",
-      { (void*)reference_sqrt },
-      { (void*)reference_sqrtl },
-      { NULL },
-      3.0f,
-      0.0f,
-      4.0f,
-      INFINITY,
-      INFINITY,
-      FTZ_OFF,
-      RELAXED_OFF,
-      unaryF },
-    { "sqrt_cr",
-      "sqrt",
-      { (void*)reference_sqrt },
-      { (void*)reference_sqrtl },
-      { NULL },
-      0.0f,
-      0.0f,
-      0.0f,
-      INFINITY,
-      INFINITY,
-      FTZ_OFF,
-      RELAXED_OFF,
-      unaryF },
-    ENTRY_EXT(
-        tan, 5.0f, 5.0f, 8192.0f, FTZ_OFF, unaryF,
-        8192.0f), // in derived mode it the ulp error is calculated as sin/cos
-                  // and in non-derived mode it is the same as half_tan.
-    ENTRY(tanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(tanpi, 6.0f, 6.0f, FTZ_OFF, unaryF),
-    //                                    ENTRY( tgamma,                 16.0f,
-    //                                    16.0f,         FTZ_OFF,     unaryF),
-    //                                    // Commented this out until we can be
-    //                                    sure this requirement is realistic
-    ENTRY(trunc, 0.0f, 0.0f, FTZ_OFF, unaryF),
-
-    HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryF),
-    HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryF),
-    HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-
-    // basic operations
-    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    { "divide",
-      "/",
-      { (void*)reference_divide },
-      { (void*)reference_dividel },
-      { (void*)reference_relaxed_divide },
-      2.5f,
-      0.0f,
-      3.0f,
-      2.5f,
-      INFINITY,
-      FTZ_OFF,
-      RELAXED_ON,
-      binaryOperatorF },
-    { "divide_cr",
-      "/",
-      { (void*)reference_divide },
-      { (void*)reference_dividel },
-      { (void*)reference_relaxed_divide },
-      0.0f,
-      0.0f,
-      0.0f,
-      0.f,
-      INFINITY,
-      FTZ_OFF,
-      RELAXED_OFF,
-      binaryOperatorF },
-    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, FTZ_OFF,
-                   unaryF), // A simple copy operation
-    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-};
-
-const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]);

diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
deleted file mode 100644
index 38f739c..0000000
--- a/test_conformance/math_brute_force/function_list.h
+++ /dev/null

@@ -1,98 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef FUNCTION_LIST_H
-#define FUNCTION_LIST_H
-
-#include "harness/compat.h"
-
-#ifndef WIN32
-#include <unistd.h>
-#endif
-
-#if defined(__APPLE__)
-#include <OpenCL/opencl.h>
-#else
-#include <CL/cl.h>
-#endif
-
-#include "harness/mt19937.h"
-
-typedef union fptr {
-    void *p;
-    double (*f_f)(double);
-    double (*f_u)(cl_uint);
-    int (*i_f)(double);
-    int (*i_f_f)(float);
-    float (*f_ff_f)(float, float);
-    double (*f_ff)(double, double);
-    int (*i_ff)(double, double);
-    double (*f_fi)(double, int);
-    double (*f_fpf)(double, double *);
-    double (*f_fpI)(double, int *);
-    double (*f_ffpI)(double, double, int *);
-    double (*f_fff)(double, double, double);
-    float (*f_fma)(float, float, float, int);
-} fptr;
-
-typedef union dptr {
-    void *p;
-    long double (*f_f)(long double);
-    long double (*f_u)(cl_ulong);
-    int (*i_f)(long double);
-    long double (*f_ff)(long double, long double);
-    int (*i_ff)(long double, long double);
-    long double (*f_fi)(long double, int);
-    long double (*f_fpf)(long double, long double *);
-    long double (*f_fpI)(long double, int *);
-    long double (*f_ffpI)(long double, long double, int *);
-    long double (*f_fff)(long double, long double, long double);
-} dptr;
-
-struct Func;
-
-typedef struct vtbl
-{
-    const char *type_name;
-    int (*TestFunc)(const struct Func *, MTdata, bool);
-    int (*DoubleTestFunc)(
-        const struct Func *, MTdata,
-        bool); // may be NULL if function is single precision only
-} vtbl;
-
-typedef struct Func
-{
-    const char *name; // common name, to be used as an argument in the shell
-    const char *nameInCode; // name as it appears in the __kernel, usually the
-                            // same as name, but different for multiplication
-    fptr func;
-    dptr dfunc;
-    fptr rfunc;
-    float float_ulps;
-    float double_ulps;
-    float float_embedded_ulps;
-    float relaxed_error;
-    float relaxed_embedded_error;
-    int ftz;
-    int relaxed;
-    const vtbl *vtbl_ptr;
-} Func;
-
-
-extern const Func functionList[];
-
-extern const size_t functionListCount;
-
-#endif

diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp
new file mode 100644
index 0000000..75b9424
--- /dev/null
+++ b/test_conformance/math_brute_force/i_unary.cpp

@@ -0,0 +1,627 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Int_Float(const Func *f, MTdata);
+int TestFunc_Int_Double(const Func *f, MTdata);
+
+extern const vtbl _i_unary = { "i_unary", TestFunc_Int_Float,
+                               TestFunc_Int_Double };
+
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       vstore3( i0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       int3 i0 = ", name, "( f0 );\n"
+                        "       vstore3( i0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       int3 i0 = ", name, "( f0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = i0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = i0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_Int_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
+    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    // This test is not using ThreadPool so we need to disable FTZ here
+    // for reference computations
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        int *r = (int *)gOut_Ref;
+        float *s = (float *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = f->func.i_f( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    if( ftz && IsFloatSubnormal(s[j]))
+                    {
+                        unsigned int correct0 = f->func.i_f( 0.0 );
+                        unsigned int correct1 = f->func.i_f( -0.0 );
+                        if( q[j] == correct0 || q[j] == correct1 )
+                            continue;
+                    }
+
+                    uint32_t err = t[j] - q[j];
+                    if( q[j] > t[j] )
+                        err = q[j] - t[j];
+                    vlog_error( "\nERROR: %s%s: %d ulp error at %a (0x%8.8x): *%d vs. %d\n", f->name, sizeNames[k], err, ((float*) gIn)[j], ((cl_uint*) gIn)[j], t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    vlog( "\n" );
+exit:
+    RestoreFPState(&oldMode);
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_Int_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    int ftz = f->ftz || gForceFTZ;
+    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // This test is not using ThreadPool so we need to disable FTZ here
+    // for reference computations
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32( (uint32_t) i + j * scale );
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32( (uint32_t) i + j );
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        int *r = (int *)gOut_Ref;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            r[j] = f->dfunc.i_f( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    if( ftz && IsDoubleSubnormal(s[j]))
+                    {
+                        unsigned int correct0 = f->dfunc.i_f( 0.0 );
+                        unsigned int correct1 = f->dfunc.i_f( -0.0 );
+                        if( q[j] == correct0 || q[j] == correct1 )
+                            continue;
+                    }
+
+                    uint32_t err = t[j] - q[j];
+                    if( q[j] > t[j] )
+                        err = q[j] - t[j];
+                    vlog_error( "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n", f->name, sizeNames[k], err, ((double*) gIn)[j], t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+            } else
+            {
+               vlog("." );
+            }
+           fflush(stdout);
+
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    vlog( "\n" );
+
+
+exit:
+    RestoreFPState(&oldMode);
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+

diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
deleted file mode 100644
index 4383fa8..0000000
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ /dev/null

@@ -1,302 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    int ftz = f->ftz || gForceFTZ;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-    int scale =
-        (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // This test is not using ThreadPool so we need to disable FTZ here
-    // for reference computations
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
-
-    Force64BitFPUPrecision();
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j);
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        int *r = (int *)gOut_Ref;
-        double *s = (double *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-            r[j] = f->dfunc.i_f(s[j]);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    if (ftz && IsDoubleSubnormal(s[j]))
-                    {
-                        unsigned int correct0 = f->dfunc.i_f(0.0);
-                        unsigned int correct1 = f->dfunc.i_f(-0.0);
-                        if (q[j] == correct0 || q[j] == correct1) continue;
-                    }
-
-                    uint32_t err = t[j] - q[j];
-                    if (q[j] > t[j]) err = q[j] - t[j];
-                    vlog_error(
-                        "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n",
-                        f->name, sizeNames[k], err, ((double *)gIn)[j], t[j],
-                        q[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    RestoreFPState(&oldMode);
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
deleted file mode 100644
index c803aa3..0000000
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ /dev/null

@@ -1,298 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in)\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // This test is not using ThreadPool so we need to disable FTZ here
-    // for reference computations
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
-
-    Force64BitFPUPrecision();
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (cl_uint)i + j * scale;
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        int *r = (int *)gOut_Ref;
-        float *s = (float *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = f->func.i_f(s[j]);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    if (ftz && IsFloatSubnormal(s[j]))
-                    {
-                        unsigned int correct0 = f->func.i_f(0.0);
-                        unsigned int correct1 = f->func.i_f(-0.0);
-                        if (q[j] == correct0 || q[j] == correct1) continue;
-                    }
-
-                    uint32_t err = t[j] - q[j];
-                    if (q[j] > t[j]) err = q[j] - t[j];
-                    vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%8.8x): "
-                               "*%d vs. %d\n",
-                               f->name, sizeNames[k], err, ((float *)gIn)[j],
-                               ((cl_uint *)gIn)[j], t[j], q[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    RestoreFPState(&oldMode);
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp
new file mode 100644
index 0000000..0670990
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_binary.cpp

@@ -0,0 +1,1234 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestMacro_Int_Float_Float(const Func *f, MTdata);
+int TestMacro_Int_Double_Double(const Func *f, MTdata);
+
+extern const vtbl _macro_binary = { "macro_binary", TestMacro_Int_Float_Float,
+                                    TestMacro_Int_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in, __global float* in2)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       float3 f0 = vload3( 0, in + 3 * i );\n"
+        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       int3 i0 = ", name, "( f0, f1 );\n"
+        "       vstore3( i0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       float3 f0, f1;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       int3 i0 = ", name, "( f0, f1 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = i0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = i0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);   }
+
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in, __global double* in2)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+        "       double3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       long3 l0 = ", name, "( f0, f1 );\n"
+        "       vstore3( l0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       double3 f0, f1;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (double3)( in2[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       long3 l0 = ", name, "( f0, f1 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = l0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = l0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestMacro_Int_Float_Float(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_int      *t,*r;
+    cl_float    *s,*s2;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_int  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        float *fp = (float *)p;
+        float *fp2 = (float *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesFloat[x];
+            fp2[j] = specialValuesFloat[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesFloatCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+    }
+
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_int *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = func.i_ff( s[j], s2[j] );
+
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_int *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_int *q = out[0];
+
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
+        {
+            if( ftz )
+            {
+                if( IsFloatSubnormal( s[j])  )
+                {
+                    if( IsFloatSubnormal( s2[j] )  )
+                    {
+                        int correct = func.i_ff( 0.0f, 0.0f );
+                        int correct2 = func.i_ff( 0.0f, -0.0f );
+                        int correct3 = func.i_ff( -0.0f, 0.0f );
+                        int correct4 = func.i_ff( -0.0f, -0.0f );
+
+                        if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                            continue;
+                    }
+                    else
+                    {
+                        int correct = func.i_ff( 0.0f, s2[j] );
+                        int correct2 = func.i_ff( -0.0f, s2[j] );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                else if( IsFloatSubnormal( s2[j] ) )
+                {
+                    int correct = func.i_ff( s[j], 0.0f );
+                    int correct2 = func.i_ff( s[j], -0.0f );
+                    if( correct == q[j] || correct2 == q[j]  )
+                        continue;
+                }
+
+            }
+
+            uint32_t err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, err, ((float*) s)[j], ((float*) s2)[j], t[j], q[j], j );
+            error = -1;
+            goto exit;
+        }
+
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsFloatSubnormal( s[j])  )
+                    {
+                        if( IsFloatSubnormal( s2[j] )  )
+                        {
+                            int correct = -func.i_ff( 0.0f, 0.0f );
+                            int correct2 = -func.i_ff( 0.0f, -0.0f );
+                            int correct3 = -func.i_ff( -0.0f, 0.0f );
+                            int correct4 = -func.i_ff( -0.0f, -0.0f );
+
+                            if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                                continue;
+                        }
+                        else
+                        {
+                            int correct = -func.i_ff( 0.0f, s2[j] );
+                            int correct2 = -func.i_ff( -0.0f, s2[j] );
+                            if( correct == q[j] || correct2 == q[j]  )
+                                continue;
+                        }
+                    }
+                    else if( IsFloatSubnormal( s2[j] ) )
+                    {
+                        int correct = -func.i_ff( s[j], 0.0f );
+                        int correct2 = -func.i_ff( s[j], -0.0f );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+
+                }
+                cl_uint err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, sizeNames[k], err, ((float*) s)[j], ((float*) s2)[j], -t[j], q[j], j );
+                error = -1;
+                goto exit;
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+exit:
+    return error;
+}
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestMacro_Int_Double_Double(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint64_t *p = (uint64_t *)gIn;
+        uint64_t *p2 = (uint64_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+        {
+            p[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+            p2[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    dptr        dfunc = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_long     *t,*r;
+    cl_double   *s,*s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_long  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    double *p = (double *)gIn + thread_id * buffer_elements;
+    double *p2 = (double *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            p[j] = specialValuesDouble[x];
+            p2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        ((cl_ulong*)p)[j] = genrand_int64(d);
+        ((cl_ulong*)p2)[j] = genrand_int64(d);
+    }
+
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_long *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = dfunc.i_ff( s[j], s2[j] );
+
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_long *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_long *q = (cl_long *) out[0];
+
+        // If we aren't getting the correctly rounded result
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
+        {
+            if( ftz )
+            {
+                if( IsDoubleSubnormal( s[j])  )
+                {
+                    if( IsDoubleSubnormal( s2[j] )  )
+                    {
+                        int64_t correct = dfunc.i_ff( 0.0f, 0.0f );
+                        int64_t correct2 = dfunc.i_ff( 0.0f, -0.0f );
+                        int64_t correct3 = dfunc.i_ff( -0.0f, 0.0f );
+                        int64_t correct4 = dfunc.i_ff( -0.0f, -0.0f );
+
+                        if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                            continue;
+                    }
+                    else
+                    {
+                        int64_t correct = dfunc.i_ff( 0.0f, s2[j] );
+                        int64_t correct2 = dfunc.i_ff( -0.0f, s2[j] );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                else if( IsDoubleSubnormal( s2[j] ) )
+                {
+                    int64_t correct = dfunc.i_ff( s[j], 0.0f );
+                    int64_t correct2 = dfunc.i_ff( s[j], -0.0f );
+                    if( correct == q[j] || correct2 == q[j]  )
+                        continue;
+                }
+
+            }
+
+            uint64_t err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld  (index: %d)\n", name, err, ((double*) s)[j], ((double*) s2)[j], t[j], q[j], j );
+            error = -1;
+            goto exit;
+        }
+
+
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = (cl_long*) out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsDoubleSubnormal( s[j])  )
+                    {
+                        if( IsDoubleSubnormal( s2[j] )  )
+                        {
+                            int64_t correct = -dfunc.i_ff( 0.0f, 0.0f );
+                            int64_t correct2 = -dfunc.i_ff( 0.0f, -0.0f );
+                            int64_t correct3 = -dfunc.i_ff( -0.0f, 0.0f );
+                            int64_t correct4 = -dfunc.i_ff( -0.0f, -0.0f );
+
+                            if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                                continue;
+                        }
+                        else
+                        {
+                            int64_t correct = -dfunc.i_ff( 0.0f, s2[j] );
+                            int64_t correct2 = -dfunc.i_ff( -0.0f, s2[j] );
+                            if( correct == q[j] || correct2 == q[j]  )
+                                continue;
+                        }
+                    }
+                    else if( IsDoubleSubnormal( s2[j] ) )
+                    {
+                        int64_t correct = -dfunc.i_ff( s[j], 0.0f );
+                        int64_t correct2 = -dfunc.i_ff( s[j], -0.0f );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+
+                }
+
+                uint64_t err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %sD%s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld  (index: %d)\n", name, sizeNames[k], err, ((double*) s)[j], ((double*) s2)[j], -t[j], q[j], j );
+                error = -1;
+                goto exit;
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+exit:
+    return error;
+}
+

diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
deleted file mode 100644
index d09915f..0000000
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ /dev/null

@@ -1,737 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global long",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global long* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       double3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       long3 l0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( l0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       double3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       long3 l0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = l0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = l0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.0,
-    -100.0,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-
-    +NAN,
-    +INFINITY,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (size_t i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    dptr dfunc = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_long *t;
-    cl_long *r;
-    cl_double *s;
-    cl_double *s2;
-
-    Force64BitFPUPrecision();
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_long *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    double *p = (double *)gIn + thread_id * buffer_elements;
-    double *p2 = (double *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            p[idx] = specialValues[x];
-            p2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        ((cl_ulong *)p)[idx] = genrand_int64(d);
-        ((cl_ulong *)p2)[idx] = genrand_int64(d);
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    // Verify data
-    t = (cl_long *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        cl_long *q = out[0];
-
-        // If we aren't getting the correctly rounded result
-        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-        {
-            // If we aren't getting the correctly rounded result
-            if (ftz)
-            {
-                if (IsDoubleSubnormal(s[j]))
-                {
-                    if (IsDoubleSubnormal(s2[j]))
-                    {
-                        int64_t correct = dfunc.i_ff(0.0f, 0.0f);
-                        int64_t correct2 = dfunc.i_ff(0.0f, -0.0f);
-                        int64_t correct3 = dfunc.i_ff(-0.0f, 0.0f);
-                        int64_t correct4 = dfunc.i_ff(-0.0f, -0.0f);
-
-                        if (correct == q[j] || correct2 == q[j]
-                            || correct3 == q[j] || correct4 == q[j])
-                            continue;
-                    }
-                    else
-                    {
-                        int64_t correct = dfunc.i_ff(0.0f, s2[j]);
-                        int64_t correct2 = dfunc.i_ff(-0.0f, s2[j]);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                else if (IsDoubleSubnormal(s2[j]))
-                {
-                    int64_t correct = dfunc.i_ff(s[j], 0.0f);
-                    int64_t correct2 = dfunc.i_ff(s[j], -0.0f);
-                    if (correct == q[j] || correct2 == q[j]) continue;
-                }
-            }
-
-            cl_ulong err = t[j] - q[j];
-            if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
-                       "vs. %lld  (index: %d)\n",
-                       name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
-                       q[j], j);
-            error = -1;
-            goto exit;
-        }
-
-
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
-        {
-            q = (cl_long *)out[k];
-            // If we aren't getting the correctly rounded result
-            if (-t[j] != q[j])
-            {
-                if (ftz)
-                {
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        if (IsDoubleSubnormal(s2[j]))
-                        {
-                            int64_t correct = -dfunc.i_ff(0.0f, 0.0f);
-                            int64_t correct2 = -dfunc.i_ff(0.0f, -0.0f);
-                            int64_t correct3 = -dfunc.i_ff(-0.0f, 0.0f);
-                            int64_t correct4 = -dfunc.i_ff(-0.0f, -0.0f);
-
-                            if (correct == q[j] || correct2 == q[j]
-                                || correct3 == q[j] || correct4 == q[j])
-                                continue;
-                        }
-                        else
-                        {
-                            int64_t correct = -dfunc.i_ff(0.0f, s2[j]);
-                            int64_t correct2 = -dfunc.i_ff(-0.0f, s2[j]);
-                            if (correct == q[j] || correct2 == q[j]) continue;
-                        }
-                    }
-                    else if (IsDoubleSubnormal(s2[j]))
-                    {
-                        int64_t correct = -dfunc.i_ff(s[j], 0.0f);
-                        int64_t correct2 = -dfunc.i_ff(s[j], -0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-
-                cl_ulong err = -t[j] - q[j];
-                if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
-                           "%.13la}: *%lld vs. %lld  (index: %d)\n",
-                           name, sizeNames[k], err, ((double *)s)[j],
-                           ((double *)s2)[j], -t[j], q[j], j);
-                error = -1;
-                goto exit;
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    return error;
-}

diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
deleted file mode 100644
index c530cda..0000000
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ /dev/null

@@ -1,726 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
-
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-
-    +NAN,
-    +INFINITY,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_int *t = 0;
-    cl_int *r = 0;
-    cl_float *s = 0;
-    cl_float *s2 = 0;
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_int *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        float *fp = (float *)p;
-        float *fp2 = (float *)p2;
-        uint32_t x, y;
-
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (float *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-
-    // Verify data
-    t = (cl_int *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        cl_int *q = out[0];
-
-        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-        {
-            if (ftz)
-            {
-                if (IsFloatSubnormal(s[j]))
-                {
-                    if (IsFloatSubnormal(s2[j]))
-                    {
-                        int correct = func.i_ff(0.0f, 0.0f);
-                        int correct2 = func.i_ff(0.0f, -0.0f);
-                        int correct3 = func.i_ff(-0.0f, 0.0f);
-                        int correct4 = func.i_ff(-0.0f, -0.0f);
-
-                        if (correct == q[j] || correct2 == q[j]
-                            || correct3 == q[j] || correct4 == q[j])
-                            continue;
-                    }
-                    else
-                    {
-                        int correct = func.i_ff(0.0f, s2[j]);
-                        int correct2 = func.i_ff(-0.0f, s2[j]);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                else if (IsFloatSubnormal(s2[j]))
-                {
-                    int correct = func.i_ff(s[j], 0.0f);
-                    int correct2 = func.i_ff(s[j], -0.0f);
-                    if (correct == q[j] || correct2 == q[j]) continue;
-                }
-            }
-
-            uint32_t err = t[j] - q[j];
-            if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
-                       "0x%8.8x (index: %d)\n",
-                       name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
-                       j);
-            error = -1;
-            goto exit;
-        }
-
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
-        {
-            q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (-t[j] != q[j])
-            {
-                if (ftz)
-                {
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        if (IsFloatSubnormal(s2[j]))
-                        {
-                            int correct = -func.i_ff(0.0f, 0.0f);
-                            int correct2 = -func.i_ff(0.0f, -0.0f);
-                            int correct3 = -func.i_ff(-0.0f, 0.0f);
-                            int correct4 = -func.i_ff(-0.0f, -0.0f);
-
-                            if (correct == q[j] || correct2 == q[j]
-                                || correct3 == q[j] || correct4 == q[j])
-                                continue;
-                        }
-                        else
-                        {
-                            int correct = -func.i_ff(0.0f, s2[j]);
-                            int correct2 = -func.i_ff(-0.0f, s2[j]);
-                            if (correct == q[j] || correct2 == q[j]) continue;
-                        }
-                    }
-                    else if (IsFloatSubnormal(s2[j]))
-                    {
-                        int correct = -func.i_ff(s[j], 0.0f);
-                        int correct2 = -func.i_ff(s[j], -0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                cl_uint err = -t[j] - q[j];
-                if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
-                           "vs. 0x%8.8x (index: %d)\n",
-                           name, sizeNames[k], err, ((float *)s)[j],
-                           ((float *)s2)[j], -t[j], q[j], j);
-                error = -1;
-                goto exit;
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-exit:
-    return error;
-}

diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp
new file mode 100644
index 0000000..c8d125b
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_unary.cpp

@@ -0,0 +1,989 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestMacro_Int_Float(const Func *f, MTdata);
+int TestMacro_Int_Double(const Func *f, MTdata);
+
+extern const vtbl _macro_unary = { "macro_unary", TestMacro_Int_Float,
+                                   TestMacro_Int_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       vstore3( i0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       int3 i0;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       i0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                        "       long3 l0 = ", name, "( d0 );\n"
+                        "       vstore3( l0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 d0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       long3 l0 = ", name, "( d0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = l0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = l0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestMacro_Int_Float(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode )
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gOutBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        cl_uint *p = (cl_uint *)gIn;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    fptr    func = job->f->func;
+    int     ftz = job->ftz;
+    cl_uint j, k;
+    cl_int error = CL_SUCCESS;
+    cl_int ret   = CL_SUCCESS;
+    const char *name = job->f->name;
+
+    int signbit_test = 0;
+    if(!strcmp(name, "signbit"))
+        signbit_test = 1;
+
+    #define ref_func(s) ( signbit_test ? func.i_f_f( s ) : func.i_f( s ) )
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_int  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = base + j * scale;
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
+    float *s = (float *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = ref_func( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+    //Verify data
+    cl_int *t = (cl_int *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_int *q = out[0];
+
+            // If we aren't getting the correctly rounded result
+            if( gMinVectorSizeIndex == 0 && t[j] != q[j])
+            {
+                // If we aren't getting the correctly rounded result
+                if( ftz )
+                {
+                    if( IsFloatSubnormal( s[j]) )
+                    {
+                        int correct = ref_func( +0.0f );
+                        int correct2 = ref_func( -0.0f );
+                        if( correct == q[j] || correct2 == q[j] )
+                            continue;
+                    }
+                }
+
+                uint32_t err = t[j] - q[j];
+                if( q[j] > t[j] )
+                    err = q[j] - t[j];
+                vlog_error( "\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", name,  err, ((float*) s)[j], t[j], q[j] );
+                error = -1;
+                goto exit;
+            }
+
+
+            for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+            {
+                q = out[k];
+                // If we aren't getting the correctly rounded result
+                if( -t[j] != q[j] )
+                {
+                    if( ftz )
+                    {
+                        if( IsFloatSubnormal( s[j]))
+                        {
+                            int correct = -ref_func( +0.0f );
+                            int correct2 = -ref_func( -0.0f );
+                            if( correct == q[j] || correct2 == q[j] )
+                                continue;
+                        }
+                    }
+
+                    uint32_t err = -t[j] - q[j];
+                    if( q[j] > -t[j] )
+                        err = q[j] + t[j];
+                    vlog_error( "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, sizeNames[k], err, ((float*) s)[j], -t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+    }
+
+exit:
+    ret = error;
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+    {
+        vlog( "clFlush 3 failed\n" );
+        return error;
+    }
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+    return ret;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data );
+
+int TestMacro_Int_Double(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode )
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        cl_ulong *p = (cl_ulong *)gIn;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    dptr    dfunc = job->f->dfunc;
+    cl_uint j, k;
+    cl_int error;
+    int ftz = job->ftz;
+    const char *name = job->f->name;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_long *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_double *p = (cl_double*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = DoubleFromUInt32( base + j * scale);
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
+    cl_double *s = (cl_double *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = dfunc.i_f( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+
+    //Verify data
+    cl_long *t = (cl_long *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_long *q = out[0];
+
+
+        // If we aren't getting the correctly rounded result
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j])
+        {
+            // If we aren't getting the correctly rounded result
+            if( ftz )
+            {
+                if( IsDoubleSubnormal( s[j]) )
+                {
+                    cl_long correct = dfunc.i_f( +0.0f );
+                    cl_long correct2 = dfunc.i_f( -0.0f );
+                    if( correct == q[j] || correct2 == q[j] )
+                        continue;
+                }
+            }
+
+            cl_ulong err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n", name,  err, ((double*) gIn)[j], t[j], q[j] );
+            return -1;
+        }
+
+
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsDoubleSubnormal( s[j]))
+                    {
+                        int64_t correct = -dfunc.i_f( +0.0f );
+                        int64_t correct2 = -dfunc.i_f( -0.0f );
+                        if( correct == q[j] || correct2 == q[j] )
+                            continue;
+                    }
+                }
+
+                cl_ulong err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n", name, sizeNames[k], err, ((double*) gIn)[j], -t[j], q[j] );
+                return -1;
+            }
+        }
+
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+    return CL_SUCCESS;
+}
+
+
+
+

diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
deleted file mode 100644
index 00e65a2..0000000
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ /dev/null

@@ -1,508 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global long",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global long* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       long3 l0 = ",
-        name,
-        "( d0 );\n"
-        "       vstore3( l0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       long3 l0 = ",
-        name,
-        "( d0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = l0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = l0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    dptr dfunc = job->f->dfunc;
-    int ftz = job->ftz;
-    cl_int error;
-    const char *name = job->f->name;
-
-    Force64BitFPUPrecision();
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_long *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Write the new values to the input array
-    cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        p[j] = DoubleFromUInt32(base + j * scale);
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
-    cl_double *s = (cl_double *)p;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Verify data
-    cl_long *t = (cl_long *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        cl_long *q = out[0];
-
-        // If we aren't getting the correctly rounded result
-        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-        {
-            // If we aren't getting the correctly rounded result
-            if (ftz)
-            {
-                if (IsDoubleSubnormal(s[j]))
-                {
-                    cl_long correct = dfunc.i_f(+0.0f);
-                    cl_long correct2 = dfunc.i_f(-0.0f);
-                    if (correct == q[j] || correct2 == q[j]) continue;
-                }
-            }
-
-            cl_ulong err = t[j] - q[j];
-            if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n",
-                       name, err, ((double *)gIn)[j], t[j], q[j]);
-            return -1;
-        }
-
-
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
-        {
-            q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (-t[j] != q[j])
-            {
-                if (ftz)
-                {
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        int64_t correct = -dfunc.i_f(+0.0f);
-                        int64_t correct2 = -dfunc.i_f(-0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-
-                cl_ulong err = -t[j] - q[j];
-                if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error(
-                    "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n",
-                    name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
-                return -1;
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-    return CL_SUCCESS;
-}

diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
deleted file mode 100644
index 3c1717a..0000000
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ /dev/null

@@ -1,523 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 i0;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    cl_int error = CL_SUCCESS;
-    cl_int ret = CL_SUCCESS;
-    const char *name = job->f->name;
-
-    int signbit_test = 0;
-    if (!strcmp(name, "signbit")) signbit_test = 1;
-
-#define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s))
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_int *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
-    float *s = (float *)p;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Verify data
-    cl_int *t = (cl_int *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_int *q = out[0];
-
-            // If we aren't getting the correctly rounded result
-            if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-            {
-                // If we aren't getting the correctly rounded result
-                if (ftz)
-                {
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        int correct = ref_func(+0.0f);
-                        int correct2 = ref_func(-0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-
-                uint32_t err = t[j] - q[j];
-                if (q[j] > t[j]) err = q[j] - t[j];
-                vlog_error("\nERROR: %s: %d ulp error at %a: *%d vs. %d\n",
-                           name, err, ((float *)s)[j], t[j], q[j]);
-                error = -1;
-                goto exit;
-            }
-
-
-            for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
-                 k++)
-            {
-                q = out[k];
-                // If we aren't getting the correctly rounded result
-                if (-t[j] != q[j])
-                {
-                    if (ftz)
-                    {
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            int correct = -ref_func(+0.0f);
-                            int correct2 = -ref_func(-0.0f);
-                            if (correct == q[j] || correct2 == q[j]) continue;
-                        }
-                    }
-
-                    uint32_t err = -t[j] - q[j];
-                    if (q[j] > -t[j]) err = q[j] + t[j];
-                    vlog_error(
-                        "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name,
-                        sizeNames[k], err, ((float *)s)[j], -t[j], q[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-
-exit:
-    ret = error;
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue)))
-    {
-        vlog("clFlush 3 failed\n");
-        return error;
-    }
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-    return ret;
-}

diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp
new file mode 100644
index 0000000..5eeae35
--- /dev/null
+++ b/test_conformance/math_brute_force/mad.cpp

@@ -0,0 +1,1128 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_mad(const Func *f, MTdata);
+int TestFunc_mad_Double(const Func *f, MTdata);
+
+extern const vtbl _mad_tbl = { "ternary", TestFunc_mad, TestFunc_mad_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2,  __global float", sizeNames[vectorSize], "* in3 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2, __global float* in3)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, f1, f2 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1, f2;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, f1, f2 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2,  __global double", sizeNames[vectorSize], "* in3 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2, __global double* in3)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, d1, d2 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1, d2;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, d1, d2 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_mad(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+//    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+        float *s3 = (float *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_fff( s[j], s2[j], s3[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data  -- Commented out on purpose. no verification possible. MAD is a random number generator.
+/*
+        uint32_t *t = gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = gOut[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = f->func.f_fff( s[j], s2[j], s3[j] );
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= f->float_ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatSubnormal(correct) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsFloatSubnormal( s[j] ) )
+                        { // look at me,
+                            double correct2 = f->func.f_fff( 0.0, s2[j], s3[j] );
+                            double correct3 = f->func.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->func.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->func.f_fff( -0.0, 0.0, s3[j] );
+                                double correct4 = f->func.f_fff( 0.0, -0.0, s3[j] );
+                                double correct5 = f->func.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                if( IsFloatSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->func.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->func.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->func.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->func.f_fff( -0.0, -0.0, 0.0f );
+                                    double correct6 = f->func.f_fff( 0.0, 0.0, -0.0f );
+                                    double correct7 = f->func.f_fff( -0.0, 0.0, -0.0f );
+                                    double correct8 = f->func.f_fff( 0.0, -0.0, -0.0f );
+                                    double correct9 = f->func.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Ulp_Error( test, correct2  );
+                                    err3 = Ulp_Error( test, correct3  );
+                                    err4 = Ulp_Error( test, correct4  );
+                                    err5 = Ulp_Error( test, correct5  );
+                                    float err6 = Ulp_Error( test, correct6  );
+                                    float err7 = Ulp_Error( test, correct7  );
+                                    float err8 = Ulp_Error( test, correct8  );
+                                    float err9 = Ulp_Error( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                     (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)) &&
+                                                     (!(fabsf(err5) <= f->float_ulps)) && (!(fabsf(err6) <= f->float_ulps)) &&
+                                                     (!(fabsf(err7) <= f->float_ulps)) && (!(fabsf(err8) <= f->float_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps )  ||
+                                        IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps )  ||
+                                        IsFloatResultSubnormal( correct6, f->float_ulps ) || IsFloatResultSubnormal(correct7, f->float_ulps )  ||
+                                        IsFloatResultSubnormal(correct8, f->float_ulps ) || IsFloatResultSubnormal( correct9, f->float_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->func.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->func.f_fff( -0.0, s2[j], 0.0 );
+                                double correct4 = f->func.f_fff( 0.0,  s2[j], -0.0 );
+                                double correct5 = f->func.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps )  ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal( s2[j] ) )
+                        {
+                            double correct2 = f->func.f_fff( s[j], 0.0, s3[j] );
+                            double correct3 = f->func.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps )  || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->func.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->func.f_fff( s[j], -0.0, 0.0 );
+                                double correct4 = f->func.f_fff( s[j], 0.0, -0.0 );
+                                double correct5 = f->func.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal(s3[j]) )
+                        {
+                            double correct2 = f->func.f_fff( s[j], s2[j], 0.0 );
+                            double correct3 = f->func.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((float*) gOut_Ref)[j], test );
+ error = -1;
+ goto exit;
+                    }
+                }
+            }
+        }
+*/
+        if( 0 == (i & 0x0fffffff) )
+        {
+            vlog("." );
+            fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "pass" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_mad_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+//    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    double maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    uint64_t step = bufferSize / sizeof( double );
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+        double *s3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data  -- Commented out on purpose. no verification possible. MAD is a random number generator.
+/*
+        uint64_t *t = gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = gOut[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsDoubleSubnormal( s[j] ) )
+                        { // look at me,
+                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
+                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
+                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
+                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                if( IsDoubleSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f );
+                                    long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f );
+                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
+                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
+                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
+                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
+                                                     (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
+                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
+                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal( s2[j] ) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
+                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps )  || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
+                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
+                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal(s3[j]) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
+                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test );
+ error = -1;
+ goto exit;
+                    }
+                }
+            }
+        }
+*/
+        if( 0 == (i & 0x0fffffff) )
+        {
+            vlog("." );
+            fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "pass" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+

diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
deleted file mode 100644
index a32cd5a..0000000
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ /dev/null

@@ -1,301 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    double maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        double *p2 = (double *)gIn2;
-        double *p3 = (double *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            p[j] = DoubleFromUInt32(genrand_int32(d));
-            p2[j] = DoubleFromUInt32(genrand_int32(d));
-            p3[j] = DoubleFromUInt32(genrand_int32(d));
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        double *s = (double *)gIn;
-        double *s2 = (double *)gIn2;
-        double *s3 = (double *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-            r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data -- No verification possible.
-        // MAD is a random number generator.
-        if (0 == (i & 0x0fffffff))
-        {
-            vlog(".");
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
deleted file mode 100644
index 095a22f..0000000
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ /dev/null

@@ -1,300 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    float maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        cl_uint *p2 = (cl_uint *)gIn2;
-        cl_uint *p3 = (cl_uint *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            p[j] = genrand_int32(d);
-            p2[j] = genrand_int32(d);
-            p3[j] = genrand_int32(d);
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        float *s = (float *)gIn;
-        float *s2 = (float *)gIn2;
-        float *s3 = (float *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data -- No verification possible.
-        // MAD is a random number generator.
-        if (0 == (i & 0x0fffffff))
-        {
-            vlog(".");
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index d6c2f11..1e33b95 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,125 +13,128 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-
-#include "function_list.h"
-#include "sleep.h"
-#include "utility.h"
+#include "Utility.h"
 
 #include <cstdio>
 #include <cstdlib>
-#include <ctime>
 #include <string>
-#include <vector>
+#include <time.h>
+#include "FunctionList.h"
+#include "Sleep.h"
 
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
 #include "harness/parseParameters.h"
-#include "harness/typeWrappers.h"
 
-#if defined(__APPLE__)
-#include <sys/sysctl.h>
-#include <sys/mman.h>
-#include <libgen.h>
-#include <sys/time.h>
-#elif defined(__linux__)
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <linux/sysctl.h>
-#include <sys/param.h>
+#if defined( __APPLE__ )
+    #include <sys/sysctl.h>
+    #include <sys/mman.h>
+    #include <libgen.h>
+    #include <sys/time.h>
+#elif defined( __linux__ )
+    #include <unistd.h>
+    #include <sys/syscall.h>
+    #include <linux/sysctl.h>
+    #include <sys/param.h>
 #endif
 
-#if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
+#if defined (__linux__) || (defined WIN32 && defined __MINGW32__)
 #include <sys/param.h>
 #endif
 
 #include "harness/testHarness.h"
 
-#define kPageSize 4096
-#define DOUBLE_REQUIRED_FEATURES                                               \
-    (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO                  \
-     | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
+#define kPageSize           4096
+#define DOUBLE_REQUIRED_FEATURES    ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM  )
 
-static std::vector<const char *> gTestNames;
-static char appName[MAXPATHLEN] = "";
-cl_device_id gDevice = NULL;
-cl_context gContext = NULL;
+const char      **gTestNames = NULL;
+unsigned int    gTestNameCount = 0;
+char            appName[ MAXPATHLEN ] = "";
+cl_device_id    gDevice = NULL;
+cl_context      gContext = NULL;
 cl_command_queue gQueue = NULL;
-static int32_t gStartTestNumber = -1;
-static int32_t gEndTestNumber = -1;
-int gSkipCorrectnessTesting = 0;
-static int gStopOnError = 0;
-static bool gSkipRestOfTests;
-int gForceFTZ = 0;
-int gWimpyMode = 0;
-static int gHasDouble = 0;
-static int gTestFloat = 1;
-// This flag should be 'ON' by default and it can be changed through the command
-// line arguments.
-static int gTestFastRelaxed = 1;
-/*This flag corresponds to defining if the implementation has Derived Fast
-  Relaxed functions. The spec does not specify ULP for derived function.  The
-  derived functions are composed of base functions which are tested for ULP,
-  thus when this flag is enabled, Derived functions will not be tested for ULP,
-  as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
-  device whether it is a derived or non-derived implementation according to
-  OpenCL 2.0 spec then it has to be changed through a command line argument.
+static int32_t  gStartTestNumber;
+static int32_t  gEndTestNumber;
+int             gSkipCorrectnessTesting = 0;
+int             gStopOnError = 0;
+static bool     gSkipRestOfTests;
+#if defined( __APPLE__ )
+int             gMeasureTimes = 1;
+#else
+int             gMeasureTimes = 0;
+#endif
+int             gReportAverageTimes = 0;
+int             gForceFTZ = 0;
+int             gWimpyMode = 0;
+int             gHasDouble = 0;
+int             gTestFloat = 1;
+//This flag should be 'ON' by default and it can be changed through the command line arguments.
+volatile int             gTestFastRelaxed = 1;
+/*This flag corresponds to defining if the implementation has Derived Fast Relaxed functions.
+  The spec does not specify ULP for derived function.  The derived functions are composed of base functions which are tested for ULP, thus when this flag is enabled,
+  Derived functions will not be tested for ULP, as per table 7.1 of OpenCL 2.0 spec.
+  Since there is no way of quering the device whether it is a derived or non-derived implementation according to OpenCL 2.0 spec then it has to be changed through a command line argument.
 */
-int gFastRelaxedDerived = 1;
-static int gToggleCorrectlyRoundedDivideSqrt = 0;
-int gDeviceILogb0 = 1;
-int gDeviceILogbNaN = 1;
-int gCheckTininessBeforeRounding = 1;
-int gIsInRTZMode = 0;
-uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
-uint32_t gMinVectorSizeIndex = 0;
-void *gIn = NULL;
-void *gIn2 = NULL;
-void *gIn3 = NULL;
-void *gOut_Ref = NULL;
-void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-void *gOut_Ref2 = NULL;
-void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-cl_mem gInBuffer = NULL;
-cl_mem gInBuffer2 = NULL;
-cl_mem gInBuffer3 = NULL;
-cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-static MTdata gMTdata;
+int             gFastRelaxedDerived = 1;
+int             gToggleCorrectlyRoundedDivideSqrt = 0;
+int             gDeviceILogb0 = 1;
+int             gDeviceILogbNaN = 1;
+int             gCheckTininessBeforeRounding = 1;
+int             gIsInRTZMode = 0;
+uint32_t        gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
+uint32_t        gMinVectorSizeIndex = 0;
+const char      *method[] = { "Best", "Average" };
+void            *gIn = NULL;
+void            *gIn2 = NULL;
+void            *gIn3 = NULL;
+void            *gOut_Ref = NULL;
+void            *gOut[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
+void            *gOut_Ref2 = NULL;
+void            *gOut2[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
+cl_mem          gInBuffer = NULL;
+cl_mem          gInBuffer2 = NULL;
+cl_mem          gInBuffer3 = NULL;
+cl_mem          gOutBuffer[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
+cl_mem          gOutBuffer2[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
+uint32_t        gComputeDevices = 0;
+uint32_t        gSimdSize = 1;
+uint32_t        gDeviceFrequency = 0;
+static MTdata   gMTdata;
 cl_device_fp_config gFloatCapabilities = 0;
-int gWimpyReductionFactor = 32;
-int gVerboseBruteForce = 0;
+cl_device_fp_config gDoubleCapabilities = 0;
+int             gWimpyReductionFactor = 32;
+int             gWimpyBufferSize = BUFFER_SIZE;
+int             gVerboseBruteForce = 0;
 
-static int ParseArgs(int argc, const char **argv);
-static void PrintUsage(void);
-static void PrintFunctions(void);
-static test_status InitCL(cl_device_id device);
-static void ReleaseCL(void);
-static int InitILogbConstants(void);
-static int IsTininessDetectedBeforeRounding(void);
-static int
-IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
+static int ParseArgs( int argc, const char **argv );
+static void PrintUsage( void );
+static void PrintFunctions( void );
+test_status InitCL( cl_device_id device );
+static void ReleaseCL( void );
+static int InitILogbConstants( void );
+static int IsTininessDetectedBeforeRounding( void );
+static int IsInRTZMode( void );         //expensive. Please check gIsInRTZMode global instead.
 
-static int doTest(const char *name)
+
+int doTest( const char* name )
 {
-    if (gSkipRestOfTests)
+    if( gSkipRestOfTests )
     {
-        vlog("Skipping function because of an earlier error.\n");
+        vlog( "Skipping function because of an earlier error.\n" );
         return 1;
     }
 
     int error = 0;
-    const Func *func_data = NULL;
+    const Func* func_data = NULL;
 
-    for (size_t i = 0; i < functionListCount; i++)
+    for( size_t i = 0; i < functionListCount; i++ )
     {
-        const Func *const temp_func = functionList + i;
-        if (strcmp(temp_func->name, name) == 0)
+        const Func* const temp_func = functionList + i;
+        if( strcmp( temp_func->name, name ) == 0 )
         {
-            if ((gStartTestNumber != -1 && i < gStartTestNumber)
-                || i > gEndTestNumber)
+            if( i < gStartTestNumber || i > gEndTestNumber )
             {
-                vlog("Skipping function #%d\n", i);
+                vlog( "Skipping function #%d\n", i );
                 return 0;
             }
 
@@ -140,164 +143,613 @@
         }
     }
 
-    if (func_data == NULL)
+    if( func_data == NULL )
     {
-        vlog("Function '%s' doesn't exist!\n", name);
-        exit(EXIT_FAILURE);
+        vlog( "Function '%s' doesn't exist!\n", name );
+        exit( EXIT_FAILURE );
     }
 
-    if (func_data->func.p == NULL)
+    if( func_data->func.p == NULL )
     {
-        vlog("'%s' is missing implementation, skipping function.\n",
-             func_data->name);
+        vlog( "'%s' is missing implementation, skipping function.\n", func_data->name );
         return 0;
     }
 
     // if correctly rounded divide & sqrt are supported by the implementation
     // then test it; otherwise skip the test
-    if (strcmp(func_data->name, "sqrt_cr") == 0
-        || strcmp(func_data->name, "divide_cr") == 0)
+    if( strcmp( func_data->name, "sqrt_cr" ) == 0 || strcmp( func_data->name, "divide_cr" ) == 0 )
     {
-        if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
+        if( ( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 )
         {
-            vlog("Correctly rounded divide and sqrt are not supported, "
-                 "skipping function.\n");
+            vlog( "Correctly rounded divide and sqrt are not supported, skipping function.\n" );
             return 0;
         }
     }
 
     {
         extern int my_ilogb(double);
-        if (0 == strcmp("ilogb", func_data->name))
+        if( 0 == strcmp( "ilogb", func_data->name ) )
         {
             InitILogbConstants();
         }
 
-        if (gTestFastRelaxed && func_data->relaxed)
+        if ( gTestFastRelaxed )
         {
-            if (get_device_cl_version(gDevice) > Version(1, 2))
+            if( func_data->relaxed )
             {
                 gTestCount++;
-                vlog("%3d: ", gTestCount);
-                // Test with relaxed requirements here.
-                if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
-                                                  true /* relaxed mode */))
+                vlog( "%3d: ", gTestCount );
+                if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
                 {
                     gFailCount++;
                     error++;
-                    if (gStopOnError)
+                    if( gStopOnError )
                     {
                         gSkipRestOfTests = true;
                         return error;
                     }
                 }
             }
-            else
-            {
-                vlog("Skipping reduced precision testing for device with "
-                     "version 1.2 or less\n");
-            }
         }
 
-        if (gTestFloat)
+        if( gTestFloat )
         {
+            int testFastRelaxedTmp = gTestFastRelaxed;
+            gTestFastRelaxed = 0;
+
             gTestCount++;
-            vlog("%3d: ", gTestCount);
-            // Don't test with relaxed requirements.
-            if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
-                                              false /* relaxed mode */))
+            vlog( "%3d: ", gTestCount );
+            if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
             {
                 gFailCount++;
                 error++;
-                if (gStopOnError)
+                if( gStopOnError )
                 {
+                    gTestFastRelaxed = testFastRelaxedTmp;
                     gSkipRestOfTests = true;
                     return error;
                 }
             }
+            gTestFastRelaxed = testFastRelaxedTmp;
         }
 
-        if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
-            && NULL != func_data->dfunc.p)
+        if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p )
         {
+            //Disable fast-relaxed-math for double precision floating-point
+            int testFastRelaxedTmp = gTestFastRelaxed;
+            gTestFastRelaxed = 0;
+
             gTestCount++;
-            vlog("%3d: ", gTestCount);
-            // Don't test with relaxed requirements.
-            if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
-                                                    false /* relaxed mode*/))
+            vlog( "%3d: ", gTestCount );
+            if( func_data->vtbl_ptr->DoubleTestFunc( func_data, gMTdata )  )
             {
                 gFailCount++;
                 error++;
-                if (gStopOnError)
+                if( gStopOnError )
                 {
+                    gTestFastRelaxed = testFastRelaxedTmp;
                     gSkipRestOfTests = true;
                     return error;
                 }
             }
+
+            //Re-enable testing fast-relaxed-math mode
+            gTestFastRelaxed = testFastRelaxedTmp;
         }
     }
 
     return error;
 }
 
+int test_acos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "acos" );
+}
+int test_acosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "acosh" );
+}
+int test_acospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "acospi" );
+}
+int test_asin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "asin" );
+}
+int test_asinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "asinh" );
+}
+int test_asinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "asinpi" );
+}
+int test_atan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atan" );
+}
+int test_atanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atanh" );
+}
+int test_atanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atanpi" );
+}
+int test_atan2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atan2" );
+}
+int test_atan2pi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atan2pi" );
+}
+int test_cbrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cbrt" );
+}
+int test_ceil( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "ceil" );
+}
+int test_copysign( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "copysign" );
+}
+int test_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cos" );
+}
+int test_cosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cosh" );
+}
+int test_cospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cospi" );
+}
+int test_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "exp" );
+}
+int test_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "exp2" );
+}
+int test_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "exp10" );
+}
+int test_expm1( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "expm1" );
+}
+int test_fabs( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fabs" );
+}
+int test_fdim( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fdim" );
+}
+int test_floor( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "floor" );
+}
+int test_fma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fma" );
+}
+int test_fmax( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fmax" );
+}
+int test_fmin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fmin" );
+}
+int test_fmod( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fmod" );
+}
+int test_fract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fract" );
+}
+int test_frexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "frexp" );
+}
+int test_hypot( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "hypot" );
+}
+int test_ilogb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "ilogb" );
+}
+int test_isequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isequal" );
+}
+int test_isfinite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isfinite" );
+}
+int test_isgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isgreater" );
+}
+int test_isgreaterequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isgreaterequal" );
+}
+int test_isinf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isinf" );
+}
+int test_isless( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isless" );
+}
+int test_islessequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "islessequal" );
+}
+int test_islessgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "islessgreater" );
+}
+int test_isnan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isnan" );
+}
+int test_isnormal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isnormal" );
+}
+int test_isnotequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isnotequal" );
+}
+int test_isordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isordered" );
+}
+int test_isunordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isunordered" );
+}
+int test_ldexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "ldexp" );
+}
+int test_lgamma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "lgamma" );
+}
+int test_lgamma_r( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "lgamma_r" );
+}
+int test_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log" );
+}
+int test_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log2" );
+}
+int test_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log10" );
+}
+int test_log1p( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log1p" );
+}
+int test_logb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "logb" );
+}
+int test_mad( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "mad" );
+}
+int test_maxmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "maxmag" );
+}
+int test_minmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "minmag" );
+}
+int test_modf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "modf" );
+}
+int test_nan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "nan" );
+}
+int test_nextafter( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "nextafter" );
+}
+int test_pow( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "pow" );
+}
+int test_pown( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "pown" );
+}
+int test_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "powr" );
+}
+int test_remainder( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "remainder" );
+}
+int test_remquo( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "remquo" );
+}
+int test_rint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "rint" );
+}
+int test_rootn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "rootn" );
+}
+int test_round( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "round" );
+}
+int test_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "rsqrt" );
+}
+int test_signbit( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "signbit" );
+}
+int test_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sin" );
+}
+int test_sincos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sincos" );
+}
+int test_sinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sinh" );
+}
+int test_sinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sinpi" );
+}
+int test_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sqrt" );
+}
+int test_sqrt_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sqrt_cr" );
+}
+int test_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "tan" );
+}
+int test_tanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "tanh" );
+}
+int test_tanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "tanpi" );
+}
+int test_trunc( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "trunc" );
+}
+int test_half_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_cos" );
+}
+int test_half_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_divide" );
+}
+int test_half_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_exp" );
+}
+int test_half_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_exp2" );
+}
+int test_half_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_exp10" );
+}
+int test_half_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_log" );
+}
+int test_half_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_log2" );
+}
+int test_half_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_log10" );
+}
+int test_half_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_powr" );
+}
+int test_half_recip( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_recip" );
+}
+int test_half_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_rsqrt" );
+}
+int test_half_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_sin" );
+}
+int test_half_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_sqrt" );
+}
+int test_half_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_tan" );
+}
+int test_add( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "add" );
+}
+int test_subtract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "subtract" );
+}
+int test_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "divide" );
+}
+int test_divide_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "divide_cr" );
+}
+int test_multiply( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "multiply" );
+}
+int test_assignment( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "assignment" );
+}
+int test_not( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "not" );
+}
 
-#define TEST_LAMBDA(name)                                                      \
-    [](cl_device_id, cl_context, cl_command_queue, int) {                      \
-        return doTest(#name);                                                  \
-    }
-
-// Redefine ADD_TEST to use TEST_LAMBDA.
-#undef ADD_TEST
-#define ADD_TEST(name)                                                         \
-    {                                                                          \
-        TEST_LAMBDA(name), #name, Version(1, 0)                                \
-    }
-
-static test_definition test_list[] = {
-    ADD_TEST(acos),          ADD_TEST(acosh),      ADD_TEST(acospi),
-    ADD_TEST(asin),          ADD_TEST(asinh),      ADD_TEST(asinpi),
-    ADD_TEST(atan),          ADD_TEST(atanh),      ADD_TEST(atanpi),
-    ADD_TEST(atan2),         ADD_TEST(atan2pi),    ADD_TEST(cbrt),
-    ADD_TEST(ceil),          ADD_TEST(copysign),   ADD_TEST(cos),
-    ADD_TEST(cosh),          ADD_TEST(cospi),      ADD_TEST(exp),
-    ADD_TEST(exp2),          ADD_TEST(exp10),      ADD_TEST(expm1),
-    ADD_TEST(fabs),          ADD_TEST(fdim),       ADD_TEST(floor),
-    ADD_TEST(fma),           ADD_TEST(fmax),       ADD_TEST(fmin),
-    ADD_TEST(fmod),          ADD_TEST(fract),      ADD_TEST(frexp),
-    ADD_TEST(hypot),         ADD_TEST(ilogb),      ADD_TEST(isequal),
-    ADD_TEST(isfinite),      ADD_TEST(isgreater),  ADD_TEST(isgreaterequal),
-    ADD_TEST(isinf),         ADD_TEST(isless),     ADD_TEST(islessequal),
-    ADD_TEST(islessgreater), ADD_TEST(isnan),      ADD_TEST(isnormal),
-    ADD_TEST(isnotequal),    ADD_TEST(isordered),  ADD_TEST(isunordered),
-    ADD_TEST(ldexp),         ADD_TEST(lgamma),     ADD_TEST(lgamma_r),
-    ADD_TEST(log),           ADD_TEST(log2),       ADD_TEST(log10),
-    ADD_TEST(log1p),         ADD_TEST(logb),       ADD_TEST(mad),
-    ADD_TEST(maxmag),        ADD_TEST(minmag),     ADD_TEST(modf),
-    ADD_TEST(nan),           ADD_TEST(nextafter),  ADD_TEST(pow),
-    ADD_TEST(pown),          ADD_TEST(powr),       ADD_TEST(remainder),
-    ADD_TEST(remquo),        ADD_TEST(rint),       ADD_TEST(rootn),
-    ADD_TEST(round),         ADD_TEST(rsqrt),      ADD_TEST(signbit),
-    ADD_TEST(sin),           ADD_TEST(sincos),     ADD_TEST(sinh),
-    ADD_TEST(sinpi),         ADD_TEST(sqrt),       ADD_TEST(sqrt_cr),
-    ADD_TEST(tan),           ADD_TEST(tanh),       ADD_TEST(tanpi),
-    ADD_TEST(trunc),         ADD_TEST(half_cos),   ADD_TEST(half_divide),
-    ADD_TEST(half_exp),      ADD_TEST(half_exp2),  ADD_TEST(half_exp10),
-    ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
-    ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
-    ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
-    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
-    ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
-    ADD_TEST(not),
+test_definition test_list[] = {
+    ADD_TEST( acos ),
+    ADD_TEST( acosh ),
+    ADD_TEST( acospi ),
+    ADD_TEST( asin ),
+    ADD_TEST( asinh ),
+    ADD_TEST( asinpi ),
+    ADD_TEST( atan ),
+    ADD_TEST( atanh ),
+    ADD_TEST( atanpi ),
+    ADD_TEST( atan2 ),
+    ADD_TEST( atan2pi ),
+    ADD_TEST( cbrt ),
+    ADD_TEST( ceil ),
+    ADD_TEST( copysign ),
+    ADD_TEST( cos ),
+    ADD_TEST( cosh ),
+    ADD_TEST( cospi ),
+    ADD_TEST( exp ),
+    ADD_TEST( exp2 ),
+    ADD_TEST( exp10 ),
+    ADD_TEST( expm1 ),
+    ADD_TEST( fabs ),
+    ADD_TEST( fdim ),
+    ADD_TEST( floor ),
+    ADD_TEST( fma ),
+    ADD_TEST( fmax ),
+    ADD_TEST( fmin ),
+    ADD_TEST( fmod ),
+    ADD_TEST( fract ),
+    ADD_TEST( frexp ),
+    ADD_TEST( hypot ),
+    ADD_TEST( ilogb ),
+    ADD_TEST( isequal ),
+    ADD_TEST( isfinite ),
+    ADD_TEST( isgreater ),
+    ADD_TEST( isgreaterequal ),
+    ADD_TEST( isinf ),
+    ADD_TEST( isless ),
+    ADD_TEST( islessequal ),
+    ADD_TEST( islessgreater ),
+    ADD_TEST( isnan ),
+    ADD_TEST( isnormal ),
+    ADD_TEST( isnotequal ),
+    ADD_TEST( isordered ),
+    ADD_TEST( isunordered ),
+    ADD_TEST( ldexp ),
+    ADD_TEST( lgamma ),
+    ADD_TEST( lgamma_r ),
+    ADD_TEST( log ),
+    ADD_TEST( log2 ),
+    ADD_TEST( log10 ),
+    ADD_TEST( log1p ),
+    ADD_TEST( logb ),
+    ADD_TEST( mad ),
+    ADD_TEST( maxmag ),
+    ADD_TEST( minmag ),
+    ADD_TEST( modf ),
+    ADD_TEST( nan ),
+    ADD_TEST( nextafter ),
+    ADD_TEST( pow ),
+    ADD_TEST( pown ),
+    ADD_TEST( powr ),
+    ADD_TEST( remainder ),
+    ADD_TEST( remquo ),
+    ADD_TEST( rint ),
+    ADD_TEST( rootn ),
+    ADD_TEST( round ),
+    ADD_TEST( rsqrt ),
+    ADD_TEST( signbit ),
+    ADD_TEST( sin ),
+    ADD_TEST( sincos ),
+    ADD_TEST( sinh ),
+    ADD_TEST( sinpi ),
+    ADD_TEST( sqrt ),
+    ADD_TEST( sqrt_cr ),
+    ADD_TEST( tan ),
+    ADD_TEST( tanh ),
+    ADD_TEST( tanpi ),
+    ADD_TEST( trunc ),
+    ADD_TEST( half_cos ),
+    ADD_TEST( half_divide ),
+    ADD_TEST( half_exp ),
+    ADD_TEST( half_exp2 ),
+    ADD_TEST( half_exp10 ),
+    ADD_TEST( half_log ),
+    ADD_TEST( half_log2 ),
+    ADD_TEST( half_log10 ),
+    ADD_TEST( half_powr ),
+    ADD_TEST( half_recip ),
+    ADD_TEST( half_rsqrt ),
+    ADD_TEST( half_sin ),
+    ADD_TEST( half_sqrt ),
+    ADD_TEST( half_tan ),
+    ADD_TEST( add ),
+    ADD_TEST( subtract ),
+    ADD_TEST( divide ),
+    ADD_TEST( divide_cr ),
+    ADD_TEST( multiply ),
+    ADD_TEST( assignment ),
+    ADD_TEST( not ),
 };
 
-#undef ADD_TEST
-#undef TEST_LAMBDA
-
-static const int test_num = ARRAY_SIZE(test_list);
+const int test_num = ARRAY_SIZE( test_list );
 
 #pragma mark -
 
-int main(int argc, const char *argv[])
+int main (int argc, const char * argv[])
 {
     int error;
 
@@ -307,113 +759,180 @@
         return -1;
     }
 
-    error = ParseArgs(argc, argv);
-    if (error) return error;
+#if defined( __APPLE__ )
+    struct timeval startTime;
+    gettimeofday( &startTime, NULL );
+#endif
+
+    error = ParseArgs( argc, argv );
+    if( error )
+        return error;
 
     // This takes a while, so prevent the machine from going to sleep.
     PreventSleep();
-    atexit(ResumeSleep);
+    atexit( ResumeSleep );
 
-    if (gSkipCorrectnessTesting)
-        vlog("*** Skipping correctness testing! ***\n\n");
-    else if (gStopOnError)
-        vlog("Stopping at first error.\n");
+    if( gSkipCorrectnessTesting )
+        vlog( "*** Skipping correctness testing! ***\n\n" );
+    else if( gStopOnError )
+        vlog( "Stopping at first error.\n" );
 
-    vlog("   \t                                        ");
-    if (gWimpyMode) vlog("   ");
-    if (!gSkipCorrectnessTesting) vlog("\t  max_ulps");
-
-    vlog("\n-------------------------------------------------------------------"
-         "----------------------------------------\n");
-
-    gMTdata = init_genrand(gRandomSeed);
-
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
-
-    int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(),
-                                      test_num, test_list, true, 0, InitCL);
-
-    RestoreFPState(&oldMode);
-
-    free_mtdata(gMTdata);
-
-    if (gQueue)
+    if( gMeasureTimes )
     {
-        int error_code = clFinish(gQueue);
-        if (error_code) vlog_error("clFinish failed:%d\n", error_code);
+        vlog( "%s times are reported at right (cycles per element):\n", method[gReportAverageTimes] );
+        vlog( "\n" );
+        if( gSkipCorrectnessTesting )
+            vlog( "   \t               ");
+        else
+            vlog( "   \t                                        ");
+        if( gWimpyMode )
+            vlog( "   " );
+        for( int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+            vlog( "\t  float%s", sizeNames[i] );
+    }
+    else
+    {
+        vlog( "   \t                                        ");
+        if( gWimpyMode )
+            vlog( "   " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t  max_ulps" );
+
+    vlog( "\n-----------------------------------------------------------------------------------------------------------\n" );
+
+    gMTdata = init_genrand( gRandomSeed );
+    if( gEndTestNumber == 0 )
+    {
+        gEndTestNumber = functionListCount;
     }
 
+    FPU_mode_type oldMode;
+    DisableFTZ( &oldMode );
+
+    int ret = runTestHarnessWithCheck( gTestNameCount, gTestNames, test_num, test_list, true, 0, InitCL );
+
+    RestoreFPState( &oldMode );
+
+    free_mtdata(gMTdata);
+    free(gTestNames);
+
+    int error_code = clFinish(gQueue);
+    if (error_code)
+        vlog_error("clFinish failed:%d\n", error_code);
+
     ReleaseCL();
 
+#if defined( __APPLE__ )
+    struct timeval endTime;
+    gettimeofday( &endTime, NULL );
+    double time = (double) endTime.tv_sec - (double) startTime.tv_sec;
+    time += 1e-6 * ((double) endTime.tv_usec - (double) startTime.tv_usec);
+    vlog( "time: %f s\n", time );
+#endif
+
     return ret;
 }
 
-static int ParseArgs(int argc, const char **argv)
+static int ParseArgs( int argc, const char **argv )
 {
-    // We only pass test names to runTestHarnessWithCheck, hence global command
-    // line options defined by the harness cannot be used by the user.
-    // To respect the implementation details of runTestHarnessWithCheck,
-    // gTestNames[0] has to exist although its value is not important.
-    gTestNames.push_back("");
-
+    int i;
+    gTestNames = (const char**) calloc( argc - 1, sizeof( char*) );
+    if( NULL == gTestNames )
+    {
+        vlog( "Failed to allocate memory for gTestNames array.\n" );
+        return 1;
+    }
+    gTestNames[0] = argv[0];
+    gTestNameCount = 1;
     int singleThreaded = 0;
 
     { // Extract the app name
-        strncpy(appName, argv[0], MAXPATHLEN);
+        strncpy( appName, argv[0], MAXPATHLEN );
 
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
         char baseName[MAXPATHLEN];
         char *base = NULL;
-        strncpy(baseName, argv[0], MAXPATHLEN);
-        base = basename(baseName);
-        if (NULL != base)
+        strncpy( baseName, argv[0], MAXPATHLEN );
+        base = basename( baseName );
+        if( NULL != base )
         {
-            strncpy(appName, base, sizeof(appName));
-            appName[sizeof(appName) - 1] = '\0';
+            strncpy( appName, base, sizeof( appName )  );
+            appName[ sizeof( appName ) -1 ] = '\0';
         }
 #endif
     }
 
-    vlog("\n%s\t", appName);
-    for (int i = 1; i < argc; i++)
+    vlog( "\n%s\t", appName );
+    for( i = 1; i < argc; i++ )
     {
         const char *arg = argv[i];
-        if (NULL == arg) break;
+        if( NULL == arg )
+            break;
 
-        vlog("\t%s", arg);
+        vlog( "\t%s", arg );
         int optionFound = 0;
-        if (arg[0] == '-')
+        if( arg[0] == '-' )
         {
-            while (arg[1] != '\0')
+            while( arg[1] != '\0' )
             {
                 arg++;
                 optionFound = 1;
-                switch (*arg)
+                switch( *arg )
                 {
-                    case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
+                    case 'a':
+                        gReportAverageTimes ^= 1;
+                        break;
 
-                    case 'd': gHasDouble ^= 1; break;
+                    case 'c':
+                        gToggleCorrectlyRoundedDivideSqrt ^= 1;
+                        break;
 
-                    case 'e': gFastRelaxedDerived ^= 1; break;
+                    case 'd':
+                        gHasDouble ^= 1;
+                        break;
 
-                    case 'f': gTestFloat ^= 1; break;
+                    case 'e':
+                        gFastRelaxedDerived ^= 1;
+                        break;
 
-                    case 'h': PrintUsage(); return -1;
+                    case 'f':
+                        gTestFloat ^= 1;
+                        break;
 
-                    case 'p': PrintFunctions(); return -1;
+                    case 'h':
+                        PrintUsage();
+                        return -1;
 
-                    case 'l': gSkipCorrectnessTesting ^= 1; break;
+                    case 'p':
+                      PrintFunctions();
+                      return -1;
 
-                    case 'm': singleThreaded ^= 1; break;
+                    case 'l':
+                        gSkipCorrectnessTesting ^= 1;
+                        break;
 
-                    case 'r': gTestFastRelaxed ^= 1; break;
+                    case 'm':
+                        singleThreaded ^= 1;
+                        break;
 
-                    case 's': gStopOnError ^= 1; break;
+                    case 'r':
+                        gTestFastRelaxed ^= 1;
+                        break;
 
-                    case 'v': gVerboseBruteForce ^= 1; break;
+                    case 's':
+                        gStopOnError ^= 1;
+                        break;
 
-                    case 'w': // wimpy mode
+                    case 't':
+                        gMeasureTimes ^= 1;
+                        break;
+
+                    case 'v':
+                        gVerboseBruteForce ^= 1;
+                        break;
+
+                    case 'w':   // wimpy mode
                         gWimpyMode ^= 1;
                         break;
 
@@ -421,10 +940,12 @@
                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
                         break;
 
-                    case 'z': gForceFTZ ^= 1; break;
+                    case 'z':
+                        gForceFTZ ^= 1;
+                        break;
 
                     case '1':
-                        if (arg[1] == '6')
+                        if( arg[1] == '6' )
                         {
                             gMinVectorSizeIndex = 5;
                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
@@ -437,194 +958,181 @@
                         }
                         break;
                     case '2':
-                        gMinVectorSizeIndex = 1;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
-                        break;
+                            gMinVectorSizeIndex = 1;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
                     case '3':
-                        gMinVectorSizeIndex = 2;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
-                        break;
+                            gMinVectorSizeIndex = 2;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
                     case '4':
-                        gMinVectorSizeIndex = 3;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
-                        break;
+                            gMinVectorSizeIndex = 3;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
                     case '8':
-                        gMinVectorSizeIndex = 4;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            gMinVectorSizeIndex = 4;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
                         break;
 
                     default:
-                        vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
+                        vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
                         PrintUsage();
                         return -1;
                 }
             }
         }
 
-        if (!optionFound)
+        if( ! optionFound )
         {
             char *t = NULL;
-            long number = strtol(arg, &t, 0);
-            if (t != arg)
+            long number = strtol( arg, &t, 0 );
+            if( t != arg )
             {
-                if (-1 == gStartTestNumber)
-                    gStartTestNumber = (int32_t)number;
+                if( 0 == gStartTestNumber )
+                    gStartTestNumber = (int32_t) number;
                 else
-                    gEndTestNumber = gStartTestNumber + (int32_t)number;
+                    gEndTestNumber = gStartTestNumber + (int32_t) number;
             }
             else
             {
                 // Make sure this is a valid name
                 unsigned int k;
-                for (k = 0; k < functionListCount; k++)
+                for (k=0; k<functionListCount; k++)
                 {
-                    const Func *f = functionList + k;
+                    const Func *f = functionList+k;
                     if (strcmp(arg, f->name) == 0)
                     {
-                        gTestNames.push_back(arg);
+                        gTestNames[ gTestNameCount ] = arg;
+                        gTestNameCount++;
                         break;
                     }
                 }
                 // If we didn't find it in the list of test names
                 if (k >= functionListCount)
                 {
-                    gTestNames.push_back(arg);
+                    gTestNames[gTestNameCount] = arg;
+                    gTestNameCount++;
                 }
             }
         }
     }
 
     // Check for the wimpy mode environment variable
-    if (getenv("CL_WIMPY_MODE"))
-    {
-        vlog("\n");
-        vlog("*** Detected CL_WIMPY_MODE env                          ***\n");
-        gWimpyMode = 1;
+    if (getenv("CL_WIMPY_MODE")) {
+      vlog( "\n" );
+      vlog( "*** Detected CL_WIMPY_MODE env                          ***\n" );
+      gWimpyMode = 1;
     }
 
-    vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
+    vlog( "\nTest binary built %s %s\n", __DATE__, __TIME__ );
 
     PrintArch();
 
-    if (gWimpyMode)
+    if( gWimpyMode )
     {
-        vlog("\n");
-        vlog("*** WARNING: Testing in Wimpy mode!                     ***\n");
-        vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
-        vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
-             gWimpyReductionFactor);
+        vlog( "\n" );
+        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
+        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
+        vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor );
     }
 
-    if (singleThreaded) SetThreadCount(1);
+    if( singleThreaded )
+        SetThreadCount(1);
 
     return 0;
 }
 
 
-static void PrintFunctions(void)
+static void PrintFunctions ( void )
 {
-    vlog("\nMath function names:\n");
-    for (int i = 0; i < functionListCount; i++)
-    {
-        vlog("\t%s\n", functionList[i].name);
-    }
+  vlog( "\nMath function names:\n" );
+  for( int i = 0; i < functionListCount; i++ )
+  {
+    vlog( "\t%s\n", functionList[ i ].name );
+  }
 }
 
-static void PrintUsage(void)
+static void PrintUsage( void )
 {
-    vlog("%s [-cglsz]: <optional: math function names>\n", appName);
-    vlog("\toptions:\n");
-    vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
-         "off)\n");
-    vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
-         "on)\n");
-    vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
-    vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
-    vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
-         "precision. (Default: on)\n");
-    vlog("\t\t-h\tPrint this message and quit\n");
-    vlog("\t\t-p\tPrint all math function names and quit\n");
-    vlog("\t\t-l\tlink check only (make sure functions are present, skip "
-         "accuracy checks.)\n");
-    vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
-    vlog("\t\t-s\tStop on error\n");
-    vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
-    vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
-         "1-10, default factor(%u)\n",
-         gWimpyReductionFactor);
-    vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
-         "device capabilities by default.)\n");
-    vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
-    vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
-         "\"-16\" tests 16-wide vectors only.\n");
-    vlog("\n\tYou may also pass a number instead of a function name.\n");
-    vlog("\tThis causes the first N tests to be skipped. The tests are "
-         "numbered.\n");
-    vlog("\tIf you pass a second number, that is the number tests to run after "
-         "the first one.\n");
-    vlog("\tA name list may be used in conjunction with a number range. In "
-         "that case,\n");
-    vlog("\tonly the named cases in the number range will run.\n");
-    vlog("\tYou may also choose to pass no arguments, in which case all tests "
-         "will be run.\n");
-    vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
-         "device.\n");
-    vlog("\n");
+    vlog( "%s [-acglstz]: <optional: math function names>\n", appName );
+    vlog( "\toptions:\n" );
+    vlog( "\t\t-a\tReport average times instead of best times\n" );
+    vlog( "\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: off)\n");
+    vlog( "\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 on)\n" );
+    vlog( "\t\t-f\tToggle float precision testing. (Default: on)\n" );
+    vlog( "\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n" );
+    vlog( "\t\t-e\tToggle test as derived implementations for fast relaxed math precision. (Default: on)\n" );
+    vlog( "\t\t-h\tPrint this message and quit\n" );
+    vlog( "\t\t-p\tPrint all math function names and quit\n" );
+    vlog( "\t\t-l\tlink check only (make sure functions are present, skip accuracy checks.)\n" );
+    vlog( "\t\t-m\tToggle run multi-threaded. (Default: on) )\n" );
+    vlog( "\t\t-s\tStop on error\n" );
+    vlog( "\t\t-t\tToggle timing  (on by default)\n" );
+    vlog( "\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
+    vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-10, default factor(%u)\n",gWimpyReductionFactor );
+    vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
+    vlog( "\t\t-v\tToggle Verbosity (Default: off)\n ");
+    vlog( "\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, \"-16\" tests 16-wide vectors only.\n" );
+    vlog( "\n\tYou may also pass a number instead of a function name.\n" );
+    vlog( "\tThis causes the first N tests to be skipped. The tests are numbered.\n" );
+    vlog( "\tIf you pass a second number, that is the number tests to run after the first one.\n" );
+    vlog( "\tA name list may be used in conjunction with a number range. In that case,\n" );
+    vlog( "\tonly the named cases in the number range will run.\n" );
+    vlog( "\tYou may also choose to pass no arguments, in which case all tests will be run.\n" );
+    vlog( "\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the device.\n" );
+    vlog( "\n" );
 }
 
-static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
-                                                   const void *private_info,
-                                                   size_t cb, void *user_data)
+static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
 {
-    vlog("%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
+    vlog( "%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data );
 }
 
-test_status InitCL(cl_device_id device)
+test_status InitCL( cl_device_id device )
 {
     int error;
     uint32_t i;
+    size_t configSize = sizeof( gComputeDevices );
     cl_device_type device_type;
 
-    error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
-                            &device_type, NULL);
-    if (error)
+    error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL );
+    if( error )
     {
-        print_error(error, "Unable to get device type");
+        print_error( error, "Unable to get device type" );
         return TEST_FAIL;
     }
 
     gDevice = device;
 
+
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
+        gComputeDevices = 1;
+
     // Check extensions
-    if (is_extension_available(gDevice, "cl_khr_fp64"))
+    if(is_extension_available(gDevice, "cl_khr_fp64"))
     {
         gHasDouble ^= 1;
-#if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
-        cl_device_fp_config doubleCapabilities = 0;
-        if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
-                                     sizeof(doubleCapabilities),
-                                     &doubleCapabilities, NULL)))
+#if defined( CL_DEVICE_DOUBLE_FP_CONFIG )
+        if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(gDoubleCapabilities), &gDoubleCapabilities, NULL)))
         {
-            vlog_error("ERROR: Unable to get device "
-                       "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
-                       error);
+            vlog_error( "ERROR: Unable to get device CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", error );
             return TEST_FAIL;
         }
 
-        if (DOUBLE_REQUIRED_FEATURES
-            != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
+        if( DOUBLE_REQUIRED_FEATURES != (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES) )
         {
             std::string list;
-            if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
-            if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
+            if (0 == (gDoubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
+            if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST) )
                 list += "CL_FP_ROUND_TO_NEAREST, ";
-            if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
+            if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO) )
                 list += "CL_FP_ROUND_TO_ZERO, ";
-            if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
+            if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF) )
                 list += "CL_FP_ROUND_TO_INF, ";
-            if (0 == (doubleCapabilities & CL_FP_INF_NAN))
+            if( 0 == (gDoubleCapabilities & CL_FP_INF_NAN) )
                 list += "CL_FP_INF_NAN, ";
-            if (0 == (doubleCapabilities & CL_FP_DENORM))
+            if( 0 == (gDoubleCapabilities & CL_FP_DENORM) )
                 list += "CL_FP_DENORM, ";
             vlog_error("ERROR: required double features are missing: %s\n",
                        list.c_str());
@@ -632,102 +1140,100 @@
             return TEST_FAIL;
         }
 #else
-        vlog_error("FAIL: device says it supports cl_khr_fp64 but "
-                   "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
+        vlog_error( "FAIL: device says it supports cl_khr_fp64 but CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n" );
         return TEST_FAIL;
 #endif
     }
 
-    uint32_t deviceFrequency = 0;
-    size_t configSize = sizeof(deviceFrequency);
-    if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
-                                 configSize, &deviceFrequency, NULL)))
-        deviceFrequency = 0;
+    configSize = sizeof( gDeviceFrequency );
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
+        gDeviceFrequency = 0;
 
-    if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
-                                 sizeof(gFloatCapabilities),
-                                 &gFloatCapabilities, NULL)))
+    if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL)))
     {
-        vlog_error(
-            "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
-            error);
+        vlog_error( "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", error );
         return TEST_FAIL;
     }
 
-    gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
-                               NULL, &error);
-    if (NULL == gContext || error)
+    gContext = clCreateContext( NULL, 1, &gDevice, bruteforce_notify_callback, NULL, &error );
+    if( NULL == gContext || error )
     {
-        vlog_error("clCreateContext failed. (%d) \n", error);
+        vlog_error( "clCreateContext failed. (%d) \n", error );
         return TEST_FAIL;
     }
 
     gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
-    if (NULL == gQueue || error)
+    if( NULL == gQueue || error )
     {
-        vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+        vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
         return TEST_FAIL;
     }
 
-    // Allocate buffers
+#if defined( __APPLE__ )
+    // FIXME: use clProtectedArray
+#endif
+    //Allocate buffers
     cl_uint min_alignment = 0;
-    error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
-                            sizeof(cl_uint), (void *)&min_alignment, NULL);
+    error = clGetDeviceInfo (gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), (void*)&min_alignment, NULL);
     if (CL_SUCCESS != error)
     {
-        vlog_error("clGetDeviceInfo failed. (%d)\n", error);
+        vlog_error( "clGetDeviceInfo failed. (%d)\n", error );
         return TEST_FAIL;
     }
-    min_alignment >>= 3; // convert bits to bytes
+    min_alignment >>= 3;    // convert bits to bytes
 
-    gIn = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gIn) return TEST_FAIL;
-    gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gIn2) return TEST_FAIL;
-    gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gIn3) return TEST_FAIL;
-    gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gOut_Ref) return TEST_FAIL;
-    gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gOut_Ref2) return TEST_FAIL;
+    gIn   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn )
+        return TEST_FAIL;
+    gIn2   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn2 )
+        return TEST_FAIL;
+    gIn3   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn3 )
+        return TEST_FAIL;
+    gOut_Ref   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gOut_Ref )
+        return TEST_FAIL;
+    gOut_Ref2   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gOut_Ref2 )
+        return TEST_FAIL;
 
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
     {
-        gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
-        if (NULL == gOut[i]) return TEST_FAIL;
-        gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
-        if (NULL == gOut2[i]) return TEST_FAIL;
+        gOut[i] = align_malloc( BUFFER_SIZE, min_alignment );
+        if( NULL == gOut[i] )
+            return TEST_FAIL;
+        gOut2[i] = align_malloc( BUFFER_SIZE, min_alignment );
+        if( NULL == gOut2[i] )
+            return TEST_FAIL;
     }
 
     cl_mem_flags device_flags = CL_MEM_READ_ONLY;
     // save a copy on the host device to make this go faster
-    if (CL_DEVICE_TYPE_CPU == device_type)
+    if( CL_DEVICE_TYPE_CPU == device_type )
         device_flags |= CL_MEM_USE_HOST_PTR;
-    else
-        device_flags |= CL_MEM_COPY_HOST_PTR;
+      else
+          device_flags |= CL_MEM_COPY_HOST_PTR;
 
     // setup input buffers
-    gInBuffer =
-        clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
-    if (gInBuffer == NULL || error)
+    gInBuffer = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
+    if( gInBuffer == NULL || error )
     {
-        vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
+        vlog_error( "clCreateBuffer1 failed for input (%d)\n", error );
         return TEST_FAIL;
     }
 
-    gInBuffer2 =
-        clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
-    if (gInBuffer2 == NULL || error)
+    gInBuffer2 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn2, &error );
+    if( gInBuffer2 == NULL || error )
     {
-        vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
+        vlog_error( "clCreateArray2 failed for input (%d)\n" , error );
         return TEST_FAIL;
     }
 
-    gInBuffer3 =
-        clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
-    if (gInBuffer3 == NULL || error)
+    gInBuffer3 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn3, &error );
+    if( gInBuffer3 == NULL  || error)
     {
-        vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
+        vlog_error( "clCreateArray3 failed for input (%d)\n", error );
         return TEST_FAIL;
     }
 
@@ -735,40 +1241,38 @@
     // setup output buffers
     device_flags = CL_MEM_READ_WRITE;
     // save a copy on the host device to make this go faster
-    if (CL_DEVICE_TYPE_CPU == device_type)
+    if( CL_DEVICE_TYPE_CPU == device_type )
         device_flags |= CL_MEM_USE_HOST_PTR;
-    else
-        device_flags |= CL_MEM_COPY_HOST_PTR;
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+      else
+          device_flags |= CL_MEM_COPY_HOST_PTR;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
     {
-        gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
-                                       gOut[i], &error);
-        if (gOutBuffer[i] == NULL || error)
+        gOutBuffer[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut[i], &error );
+        if( gOutBuffer[i] == NULL || error )
         {
-            vlog_error("clCreateBuffer failed for output (%d)\n", error);
+            vlog_error( "clCreateArray failed for output (%d)\n", error  );
             return TEST_FAIL;
         }
-        gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
-                                        gOut2[i], &error);
-        if (gOutBuffer2[i] == NULL || error)
+        gOutBuffer2[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut2[i], &error );
+        if( gOutBuffer2[i] == NULL || error)
         {
-            vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
+            vlog_error( "clCreateArray2 failed for output (%d)\n", error );
             return TEST_FAIL;
         }
     }
 
     // we are embedded, check current rounding mode
-    if (gIsEmbedded)
+    if( gIsEmbedded )
     {
         gIsInRTZMode = IsInRTZMode();
     }
 
-    // Check tininess detection
+    //Check tininess detection
     IsTininessDetectedBeforeRounding();
 
     cl_platform_id platform;
     int err = clGetPlatformIDs(1, &platform, NULL);
-    if (err)
+    if( err )
     {
         print_error(err, "clGetPlatformIDs failed");
         return TEST_FAIL;
@@ -776,97 +1280,78 @@
 
     char c[1024];
     static const char *no_yes[] = { "NO", "YES" };
-    vlog("\nCompute Device info:\n");
+    vlog( "\nCompute Device info:\n" );
     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
-    vlog("\tPlatform Version: %s\n", c);
+    vlog( "\tPlatform Version: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
-    vlog("\tDevice Name: %s\n", c);
+    vlog( "\tDevice Name: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
-    vlog("\tVendor: %s\n", c);
+    vlog( "\tVendor: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
-    vlog("\tDevice Version: %s\n", c);
+    vlog( "\tDevice Version: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
-    vlog("\tCL C Version: %s\n", c);
+    vlog( "\tCL C Version: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
-    vlog("\tDriver Version: %s\n", c);
-    vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
-    vlog("\tSubnormal values supported for floats? %s\n",
-         no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
-    vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
-         no_yes[0
-                != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
-    if (gToggleCorrectlyRoundedDivideSqrt)
+    vlog( "\tDriver Version: %s\n", c );
+    vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
+    vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)] );
+    vlog( "\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
+    if( gToggleCorrectlyRoundedDivideSqrt )
     {
         gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
     }
-    vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
-         no_yes[0
-                != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
-    vlog("\tTesting with FTZ mode ON for floats? %s\n",
-         no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
-    vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
-    vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
-    if (gTestFastRelaxed)
+    vlog( "\tTesting with correctly rounded float divide and sqrt? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
+    vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)] );
+    vlog( "\tTesting single precision? %s\n", no_yes[0 != gTestFloat] );
+    vlog( "\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed] );
+    if(gTestFastRelaxed)
     {
-        vlog("\tFast relaxed math has derived implementations? %s\n",
-             no_yes[0 != gFastRelaxedDerived]);
+      vlog( "\tFast relaxed math has derived implementations? %s\n", no_yes[0 != gFastRelaxedDerived] );
     }
-    vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
-    if (sizeof(long double) == sizeof(double) && gHasDouble)
+    vlog( "\tTesting double precision? %s\n", no_yes[0 != gHasDouble] );
+    if( sizeof( long double) == sizeof( double ) && gHasDouble )
     {
-        vlog("\n\t\tWARNING: Host system long double does not have better "
-             "precision than double!\n");
-        vlog("\t\t         All double results that do not match the reference "
-             "result have their reported\n");
-        vlog("\t\t         error inflated by 0.5 ulps to account for the fact "
-             "that this system\n");
-        vlog("\t\t         can not accurately represent the right result to an "
-             "accuracy closer\n");
-        vlog("\t\t         than half an ulp. See comments in "
-             "Bruteforce_Ulp_Error_Double() for more details.\n\n");
+        vlog( "\n\t\tWARNING: Host system long double does not have better precision than double!\n" );
+        vlog( "\t\t         All double results that do not match the reference result have their reported\n" );
+        vlog( "\t\t         error inflated by 0.5 ulps to account for the fact that this system\n" );
+        vlog( "\t\t         can not accurately represent the right result to an accuracy closer\n" );
+        vlog( "\t\t         than half an ulp. See comments in Bruteforce_Ulp_Error_Double() for more details.\n\n" );
     }
 
-    vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
-    if (gIsEmbedded)
-        vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
-    vlog("\tTininess is detected before rounding? %s\n",
-         no_yes[0 != gCheckTininessBeforeRounding]);
-    vlog("\tWorker threads: %d\n", GetThreadCount());
-    vlog("\tTesting vector sizes:");
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-        vlog("\t%d", sizeValues[i]);
+    vlog( "\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded] );
+    if( gIsEmbedded )
+        vlog( "\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode] );
+    vlog( "\tTininess is detected before rounding? %s\n", no_yes[0 != gCheckTininessBeforeRounding] );
+    vlog( "\tWorker threads: %d\n", GetThreadCount() );
+    vlog( "\tTesting vector sizes:" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        vlog( "\t%d", sizeValues[i] );
 
     vlog("\n");
     vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
-    vlog("\n\n");
+    vlog( "\n\n" );
 
-    // Check to see if we are using single threaded mode on other than a 1.0
-    // device
-    if (getenv("CL_TEST_SINGLE_THREADED"))
-    {
+    // Check to see if we are using single threaded mode on other than a 1.0 device
+    if (getenv( "CL_TEST_SINGLE_THREADED" )) {
 
-        char device_version[1024] = { 0 };
-        clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
-                        device_version, NULL);
+      char device_version[1024] = { 0 };
+      clGetDeviceInfo( gDevice, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
 
-        if (strcmp("OpenCL 1.0 ", device_version))
-        {
-            vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
-                 "Running single threaded.\n");
-        }
+      if (strcmp("OpenCL 1.0 ",device_version)) {
+        vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
+      }
     }
 
     return TEST_PASS;
 }
 
-static void ReleaseCL(void)
+static void ReleaseCL( void )
 {
     uint32_t i;
     clReleaseMemObject(gInBuffer);
     clReleaseMemObject(gInBuffer2);
     clReleaseMemObject(gInBuffer3);
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
+    for ( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) {
         clReleaseMemObject(gOutBuffer[i]);
         clReleaseMemObject(gOutBuffer2[i]);
     }
@@ -879,182 +1364,170 @@
     align_free(gOut_Ref);
     align_free(gOut_Ref2);
 
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
     {
         align_free(gOut[i]);
         align_free(gOut2[i]);
     }
 }
 
-void _LogBuildError(cl_program p, int line, const char *file)
+void _LogBuildError( cl_program p, int line, const char *file )
 {
     char the_log[2048] = "";
 
-    vlog_error("%s:%d: Build Log:\n", file, line);
-    if (0
-        == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
-                                 sizeof(the_log), the_log, NULL))
-        vlog_error("%s", the_log);
+    vlog_error( "%s:%d: Build Log:\n", file, line );
+    if( 0 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(the_log), the_log, NULL) )
+        vlog_error( "%s", the_log );
     else
-        vlog_error("*** Error getting build log for program %p\n", p);
+        vlog_error( "*** Error getting build log for program %p\n", p );
 }
 
-int InitILogbConstants(void)
+int InitILogbConstants( void )
 {
     int error;
-    const char *kernelSource =
-        R"(__kernel void GetILogBConstants( __global int *out )
-        {
-            out[0] = FP_ILOGB0;
-            out[1] = FP_ILOGBNAN;
-        })";
+    const char *kernel =
+    "__kernel void GetILogBConstants( __global int *out )\n"
+    "{\n"
+    "   out[0] = FP_ILOGB0;\n"
+    "   out[1] = FP_ILOGBNAN;\n"
+    "}\n";
 
-    clProgramWrapper query;
-    clKernelWrapper kernel;
-    error = create_single_kernel_helper(gContext, &query, &kernel, 1,
-                                        &kernelSource, "GetILogBConstants");
-    if (error != CL_SUCCESS)
+    cl_program query;
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (NULL == query || error)
     {
-        vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
-                   "FP_ILOGBNAN for the device. (%d)",
-                   error);
+        vlog_error( "Error: Unable to create program to get FP_ILOGB0 and FP_ILOGBNAN for the device. (%d)", error );
         return error;
     }
 
-    if ((error =
-             clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
-                            &gOutBuffer[gMinVectorSizeIndex])))
+    cl_kernel k = clCreateKernel( query, "GetILogBConstants", &error );
+    if( NULL == k || error)
     {
-        vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
-                   "FP_ILOGBNAN for the device. Err = %d",
-                   error);
+      vlog_error( "Error: Unable to create kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
+        return error;
+    }
+
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
         return error;
     }
 
     size_t dim = 1;
-    if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
-                                        NULL, NULL)))
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
     {
-        vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
-                   "FP_ILOGBNAN for the device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to execute kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
         return error;
     }
 
-    struct
+    struct{ cl_int ilogb0, ilogbnan; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
     {
-        cl_int ilogb0, ilogbnan;
-    } data;
-    if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
-                                     CL_TRUE, 0, sizeof(data), &data, 0, NULL,
-                                     NULL)))
-    {
-        vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the device. Err = %d", error );
         return error;
     }
 
     gDeviceILogb0 = data.ilogb0;
     gDeviceILogbNaN = data.ilogbnan;
 
+    clReleaseKernel(k);
+    clReleaseProgram(query);
+
     return 0;
 }
 
-int IsTininessDetectedBeforeRounding(void)
+int IsTininessDetectedBeforeRounding( void )
 {
     int error;
-    const char *kernelSource =
-        R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
-        {
-           volatile float a = 0x1.000002p-126f;
-           volatile float b = 0x1.fffffcp-1f;
-           out[0] = a * b; // product is 0x1.fffffffffff8p-127
-        })";
+    const char *kernel =
+    "__kernel void IsTininessDetectedBeforeRounding( __global float *out )\n"
+    "{\n"
+    "   volatile float a = 0x1.000002p-126f;\n"
+    "   volatile float b = 0x1.fffffcp-1f;\n"       // product is 0x1.fffffffffff8p-127
+    "   out[0] = a * b;\n"
+    "}\n";
 
-    clProgramWrapper query;
-    clKernelWrapper kernel;
-    error =
-        create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
-                                    "IsTininessDetectedBeforeRounding");
-    if (error != CL_SUCCESS)
-    {
-        vlog_error("Error: Unable to create kernel to detect how tininess is "
-                   "detected for the device. (%d)",
-                   error);
+    cl_program query;
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (error != CL_SUCCESS) {
+        vlog_error( "Error: Unable to create program to detect how tininess is detected for the device. (%d)", error );
         return error;
     }
 
-    if ((error =
-             clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
-                            &gOutBuffer[gMinVectorSizeIndex])))
+    cl_kernel k = clCreateKernel( query, "IsTininessDetectedBeforeRounding", &error );
+    if( NULL == k || error)
     {
-        vlog_error("Error: Unable to set kernel arg to detect how tininess is "
-                   "detected  for the device. Err = %d",
-                   error);
+      vlog_error( "Error: Unable to create kernel to detect how tininess is detected  for the device. Err = %d", error );
+        return error;
+    }
+
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to detect how tininess is detected  for the device. Err = %d", error );
         return error;
     }
 
     size_t dim = 1;
-    if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
-                                        NULL, NULL)))
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
     {
-        vlog_error("Error: Unable to execute kernel to detect how tininess is "
-                   "detected  for the device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to execute kernel to detect how tininess is detected  for the device. Err = %d", error );
         return error;
     }
 
-    struct
+    struct{ cl_uint f; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
     {
-        cl_uint f;
-    } data;
-    if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
-                                     CL_TRUE, 0, sizeof(data), &data, 0, NULL,
-                                     NULL)))
-    {
-        vlog_error("Error: unable to read result from tininess test from the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: unable to read result from tininess test from the device. Err = %d", error );
         return error;
     }
 
     gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
 
+    clReleaseKernel(k);
+    clReleaseProgram(query);
+
     return 0;
 }
 
 
-int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
-               cl_program *p, bool relaxedMode)
+int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p )
 {
     int error = 0;
     char options[200] = "";
 
-    if (gForceFTZ)
+    if( gForceFTZ )
     {
-        strcat(options, " -cl-denorms-are-zero");
+      strcat(options," -cl-denorms-are-zero");
     }
 
-    if (relaxedMode)
+    if( gTestFastRelaxed )
     {
-        strcat(options, " -cl-fast-relaxed-math");
+      strcat(options, " -cl-fast-relaxed-math");
     }
 
-    error =
-        create_single_kernel_helper(gContext, p, k, count, c, name, options);
+    error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
     if (error != CL_SUCCESS)
     {
-        vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
+        vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
+        return error;
+    }
+
+    *k = clCreateKernel( *p, name, &error );
+    if( NULL == *k || error )
+    {
+        char    buffer[2048] = "";
+
+        vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
+        clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
+        vlog_error("Log: %s\n", buffer);
+        clReleaseProgram( *p );
         return error;
     }
 
     return error;
 }
 
-int MakeKernels(const char **c, cl_uint count, const char *name,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
+int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p )
 {
     int error = 0;
     cl_uint i;
@@ -1062,41 +1535,39 @@
 
     if (gForceFTZ)
     {
-        strcat(options, " -cl-denorms-are-zero ");
+      strcat(options," -cl-denorms-are-zero ");
     }
 
-    if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
+    if( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT )
     {
-        strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
+      strcat(options," -cl-fp32-correctly-rounded-divide-sqrt ");
     }
 
-    if (relaxedMode)
+    if( gTestFastRelaxed )
     {
-        strcat(options, " -cl-fast-relaxed-math");
+      strcat(options, " -cl-fast-relaxed-math");
     }
 
-    error =
-        create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
-    if (error != CL_SUCCESS)
+    error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
+    if ( error != CL_SUCCESS )
     {
-        vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
+        vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
         return error;
     }
 
 
-    memset(k, 0, kernel_count * sizeof(*k));
-    for (i = 0; i < kernel_count; i++)
+    memset( k, 0, kernel_count * sizeof( *k) );
+    for( i = 0; i< kernel_count; i++ )
     {
-        k[i] = clCreateKernel(*p, name, &error);
-        if (NULL == k[i] || error)
+        k[i] = clCreateKernel( *p, name, &error );
+        if( NULL == k[i]|| error )
         {
-            char buffer[2048] = "";
+            char    buffer[2048] = "";
 
             vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
-            clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
-                                  sizeof(buffer), buffer, NULL);
+            clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
             vlog_error("Log: %s\n", buffer);
-            clReleaseProgram(*p);
+            clReleaseProgram( *p );
             return error;
         }
     }
@@ -1105,116 +1576,99 @@
 }
 
 
-static int IsInRTZMode(void)
+static int IsInRTZMode( void )
 {
     int error;
-    const char *kernelSource =
-        R"(__kernel void GetRoundingMode( __global int *out )
-        {
-            volatile float a = 0x1.0p23f;
-            volatile float b = -0x1.0p23f;
-            out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
-        })";
+    const char *kernel =
+    "__kernel void GetRoundingMode( __global int *out )\n"
+    "{\n"
+    "   volatile float a = 0x1.0p23f;\n"
+    "   volatile float b = -0x1.0p23f;\n"
+    "   out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);\n"
+    "}\n";
 
-    clProgramWrapper query;
-    clKernelWrapper kernel;
-    error = create_single_kernel_helper(gContext, &query, &kernel, 1,
-                                        &kernelSource, "GetRoundingMode");
-    if (error != CL_SUCCESS)
-    {
-        vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
-                   "device. (%d)",
-                   error);
+    cl_program query;
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (error != CL_SUCCESS) {
+        vlog_error( "Error: Unable to create program to detect RTZ mode for the device. (%d)", error );
         return error;
     }
 
-    if ((error =
-             clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
-                            &gOutBuffer[gMinVectorSizeIndex])))
+    cl_kernel k = clCreateKernel( query, "GetRoundingMode", &error );
+    if( NULL == k || error)
     {
-        vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to create kernel to gdetect RTZ mode for the device. Err = %d", error );
+        return error;
+    }
+
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to detect RTZ mode for the device. Err = %d", error );
         return error;
     }
 
     size_t dim = 1;
-    if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
-                                        NULL, NULL)))
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
     {
-        vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to execute kernel to detect RTZ mode for the device. Err = %d", error );
         return error;
     }
 
-    struct
+    struct{ cl_int isRTZ; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
     {
-        cl_int isRTZ;
-    } data;
-    if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
-                                     CL_TRUE, 0, sizeof(data), &data, 0, NULL,
-                                     NULL)))
-    {
-        vlog_error(
-            "Error: unable to read RTZ mode data from the device. Err = %d",
-            error);
+        vlog_error( "Error: unable to read RTZ mode data from the device. Err = %d", error );
         return error;
     }
 
+    clReleaseKernel(k);
+    clReleaseProgram(query);
+
     return data.isRTZ;
 }
 
 #pragma mark -
 
-const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
-const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
+const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
+const int  sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
 
-// TODO: There is another version of Ulp_Error_Double defined in
-// test_common/harness/errorHelpers.c
-float Bruteforce_Ulp_Error_Double(double test, long double reference)
+// TODO: There is another version of Ulp_Error_Double defined in test_common/harness/errorHelpers.c
+float Bruteforce_Ulp_Error_Double( double test, long double reference )
 {
-    // Check for Non-power-of-two and NaN
+//Check for Non-power-of-two and NaN
 
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
-    //
-    //    if( (float) reference == test )
-    //        return 0.0f;
-    //
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+  // Note: This function presumes that someone has already tested whether the result is correctly,
+  // rounded before calling this function.  That test:
+  //
+  //    if( (float) reference == test )
+  //        return 0.0f;
+  //
+  // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+  // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+  // results.
 
-    // Deal with long double = double
-    // On most systems long double is a higher precision type than double. They
-    // provide either a 80-bit or greater floating point type, or they provide a
-    // head-tail double double format. That is sufficient to represent the
-    // accuracy of a floating point result to many more bits than double and we
-    // can calculate sub-ulp errors. This is the standard system for which this
-    // test suite is designed.
-    //
-    // On some systems double and long double are the same thing. Then we run
-    // into a problem, because our representation of the infinitely precise
-    // result (passed in as reference above) can be off by as much as a half
-    // double precision ulp itself.  In this case, we inflate the reported error
-    // by half an ulp to take this into account.  A more correct and permanent
-    // fix would be to undertake refactoring the reference code to return
-    // results in this format:
-    //
-    //    typedef struct DoubleReference
-    //    { // true value = correctlyRoundedResult + ulps *
-    //    ulp(correctlyRoundedResult)        (infinitely precise)
-    //        double  correctlyRoundedResult;     // as best we can
-    //        double  ulps;                       // plus a fractional amount to
-    //        account for the difference
-    //    }DoubleReference;                       //     between infinitely
-    //    precise result and correctlyRoundedResult, in units of ulps.
-    //
-    // This would provide a useful higher-than-double precision format for
-    // everyone that we can use, and would solve a few problems with
-    // representing absolute errors below DBL_MIN and over DBL_MAX for systems
-    // that use a head to tail double double for long double.
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
 
     int x;
     long double testVal = test;
@@ -1222,73 +1676,166 @@
     // First, handle special reference values
     if (isinf(reference))
     {
-        if (reference == testVal) return 0.0f;
+    if (reference == testVal)
+        return 0.0f;
 
-        return INFINITY;
+    return INFINITY;
     }
 
     if (isnan(reference))
     {
-        if (isnan(testVal)) return 0.0f;
+    if (isnan(testVal))
+        return 0.0f;
 
-        return INFINITY;
+    return INFINITY;
     }
 
-    if (0.0L != reference && 0.5L != frexpl(reference, &x))
+    if ( 0.0L != reference && 0.5L != frexpl(reference, &x) )
     { // Non-zero and Non-power of two
 
-        // allow correctly rounded results to pass through unmolested. (We might
-        // add error to it below.) There is something of a performance
-        // optimization here.
-        if (testVal == reference) return 0.0f;
+       // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
+       // There is something of a performance optimization here.
+        if( testVal == reference )
+            return 0.0f;
 
         // The unbiased exponent of the ulp unit place
-        int ulp_exp =
-            DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
 
         // Scale the exponent of the error
-        float result = (float)scalbnl(testVal - reference, ulp_exp);
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
 
-        // account for rounding error in reference result on systems that do not
-        // have a higher precision floating point type (see above)
-        if (sizeof(long double) == sizeof(double))
-            result += copysignf(0.5f, result);
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
 
         return result;
     }
 
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
-    int ulp_exp =
-        DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
 
-    // allow correctly rounded results to pass through unmolested. (We might add
-    // error to it below.) There is something of a performance optimization here
-    // too.
-    if (testVal == reference) return 0.0f;
+   // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
+   // There is something of a performance optimization here too.
+    if( testVal == reference )
+        return 0.0f;
 
     // Scale the exponent of the error
-    float result = (float)scalbnl(testVal - reference, ulp_exp);
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
 
-    // account for rounding error in reference result on systems that do not
-    // have a higher precision floating point type (see above)
-    if (sizeof(long double) == sizeof(double))
-        result += copysignf(0.5f, result);
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
 
     return result;
 }
 
-float Abs_Error(float test, double reference)
+float Abs_Error( float test, double reference )
 {
-    if (isnan(test) && isnan(reference)) return 0.0f;
-    return fabs((float)(reference - (double)test));
+  if( isnan(test) && isnan(reference) )
+    return 0.0f;
+  return fabs((float)(reference-(double)test));
 }
 
-cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
+/*
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+float Ulp_Error_Half( float test, double reference )
 {
-    if (0 == (x & (x - 1))) return x;
+    union{ double d; uint64_t u; }u;     u.d = reference;
 
-    while (x & (x - 1)) x &= x - 1;
+  // Note: This function presumes that someone has already tested whether the result is correctly,
+  // rounded before calling this function.  That test:
+  //
+  //    if( (float) reference == test )
+  //        return 0.0f;
+  //
+  // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+  // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+  // results.
 
-    return x + x;
+    double testVal = test;
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    // reference is a normal power of two or a zero
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
 }
+*/
+
+
+#if defined( __APPLE__ )
+    #include <mach/mach_time.h>
+#endif
+
+uint64_t GetTime( void )
+{
+#if defined( __APPLE__ )
+    return mach_absolute_time();
+#elif defined(_WIN32) && defined(_MSC_VER)
+    return  ReadTime();
+#else
+    //mach_absolute_time is a high precision timer with precision < 1 microsecond.
+    #warning need accurate clock here.  Times are invalid.
+    return 0;
+#endif
+}
+
+
+#if defined(_WIN32) && defined (_MSC_VER)
+/* function is defined in "compat.h" */
+#else
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    uint64_t diff = endTime - startTime;
+    static double conversion = 0.0;
+
+    if( 0.0 == conversion )
+    {
+#if defined( __APPLE__ )
+        mach_timebase_info_data_t info = {0,0};
+        kern_return_t   err = mach_timebase_info( &info );
+        if( 0 == err )
+            conversion = 1e-9 * (double) info.numer / (double) info.denom;
+#else
+    // This function consumes output from GetTime() above, and converts the time to secionds.
+    #warning need accurate ticks to seconds conversion factor here. Times are invalid.
+#endif
+    }
+
+    // strictly speaking we should also be subtracting out timer latency here
+    return conversion * (double) diff;
+}
+#endif
+
+cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
+{
+    if( 0 == (x & (x-1)))
+        return x;
+
+    while( x & (x-1) )
+        x &= x-1;
+
+    return x+x;
+}
+

diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 3a6516b..99c8eb3 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,55 +13,56 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-
-#include "reference_math.h"
 #include "harness/compat.h"
-
-#include <climits>
+#include "reference_math.h"
+#include <limits.h>
 
 #if !defined(_WIN32)
-#include <cstring>
+#include <string.h>
 #endif
 
-#include "utility.h"
+#include "Utility.h"
 
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
-#include <xmmintrin.h>
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    #include <xmmintrin.h>
 #endif
-#if defined(__SSE2__)                                                          \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
-#include <emmintrin.h>
+#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    #include <emmintrin.h>
 #endif
 
 #ifndef M_PI_4
-#define M_PI_4 (M_PI / 4)
+    #define M_PI_4 (M_PI/4)
 #endif
 
+#define EVALUATE( x )       x
+#define CONCATENATE(x, y)  x ## EVALUATE(y)
+
 #pragma STDC FP_CONTRACT OFF
 static void __log2_ep(double *hi, double *lo, double x);
 
-typedef union {
+typedef union
+{
     uint64_t i;
     double d;
-} uint64d_t;
+}uint64d_t;
 
 static const uint64d_t _CL_NAN = { 0x7ff8000000000000ULL };
 
 #define cl_make_nan() _CL_NAN.d
 
-static double reduce1(double x)
+static double reduce1( double x );
+static double reduce1( double x )
 {
-    if (fabs(x) >= HEX_DBL(+, 1, 0, +, 53))
+    if( fabs(x) >= HEX_DBL( +, 1, 0, +, 53 ) )
     {
-        if (fabs(x) == INFINITY) return cl_make_nan();
+        if( fabs(x) == INFINITY )
+            return cl_make_nan();
 
-        return 0.0; // we patch up the sign for sinPi and cosPi later, since
-                    // they need different signs
+        return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs
     }
 
     // Find the nearest multiple of 2
-    const double r = copysign(HEX_DBL(+, 1, 0, +, 53), x);
+    const double r = copysign( HEX_DBL( +, 1, 0, +, 53 ), x );
     double z = x + r;
     z -= r;
 
@@ -69,374 +70,382 @@
     return x - z;
 }
 
-double reference_acospi(double x) { return reference_acos(x) / M_PI; }
-double reference_asinpi(double x) { return reference_asin(x) / M_PI; }
-double reference_atanpi(double x) { return reference_atan(x) / M_PI; }
-double reference_atan2pi(double y, double x)
+/*
+static double reduceHalf( double x );
+static double reduceHalf( double x )
 {
-    return reference_atan2(y, x) / M_PI;
-}
-double reference_cospi(double x)
-{
-    if (reference_fabs(x) >= HEX_DBL(+, 1, 0, +, 52))
+    if( fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) )
     {
-        if (reference_fabs(x) == INFINITY) return cl_make_nan();
+        if( fabs(x) == INFINITY )
+            return cl_make_nan();
 
-        // Note this probably fails for odd values between 0x1.0p52 and
-        // 0x1.0p53. However, when starting with single precision inputs, there
-        // will be no odd values.
+        return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs
+    }
+
+    // Find the nearest multiple of 1
+    const double r = copysign( HEX_DBL( +, 1, 0, +, 52 ), x );
+    double z = x + r;
+    z -= r;
+
+    // subtract it from x. Value is now in the range -0.5 <= x <= 0.5
+    return x - z;
+}
+*/
+
+double reference_acospi( double x) {  return reference_acos( x ) / M_PI;    }
+double reference_asinpi( double x) {  return reference_asin( x ) / M_PI;    }
+double reference_atanpi( double x) {  return reference_atan( x ) / M_PI;    }
+double reference_atan2pi( double y, double x ) { return reference_atan2( y, x) / M_PI; }
+double reference_cospi( double x)
+{
+    if( reference_fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) )
+    {
+        if( reference_fabs(x) == INFINITY )
+            return cl_make_nan();
+
+        //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53.
+        //However, when starting with single precision inputs, there will be no odd values.
 
         return 1.0;
     }
 
-    x = reduce1(x + 0.5);
+    x = reduce1(x+0.5);
 
     // reduce to [-0.5, 0.5]
-    if (x < -0.5)
+    if( x < -0.5 )
         x = -1 - x;
-    else if (x > 0.5)
+    else if ( x > 0.5 )
         x = 1 - x;
 
     // cosPi zeros are all +0
-    if (x == 0.0) return 0.0;
+    if( x == 0.0 )
+        return 0.0;
 
-    return reference_sin(x * M_PI);
+    return reference_sin( x * M_PI );
 }
 
-double reference_relaxed_cospi(double x) { return reference_cospi(x); }
+double reference_relaxed_divide( double x, double y ) { return (float)(((float) x ) / ( (float) y )); }
 
-double reference_relaxed_divide(double x, double y)
-{
-    return (float)(((float)x) / ((float)y));
-}
-
-double reference_divide(double x, double y) { return x / y; }
+double reference_divide( double x, double y ) { return x / y; }
 
 // Add a + b. If the result modulo overflowed, write 1 to *carry, otherwise 0
-static inline cl_ulong add_carry(cl_ulong a, cl_ulong b, cl_ulong *carry)
+static inline cl_ulong  add_carry( cl_ulong a, cl_ulong b, cl_ulong *carry )
 {
     cl_ulong result = a + b;
     *carry = result < a;
     return result;
 }
 
-// Subtract a - b. If the result modulo overflowed, write 1 to *carry, otherwise
-// 0
-static inline cl_ulong sub_carry(cl_ulong a, cl_ulong b, cl_ulong *carry)
+// Subtract a - b. If the result modulo overflowed, write 1 to *carry, otherwise 0
+static inline cl_ulong  sub_carry( cl_ulong a, cl_ulong b, cl_ulong *carry )
 {
     cl_ulong result = a - b;
     *carry = result > a;
     return result;
 }
 
-static float fallback_frexpf(float x, int *iptr)
+static float fallback_frexpf( float x, int *iptr )
 {
     cl_uint u, v;
     float fu, fv;
 
-    memcpy(&u, &x, sizeof(u));
+    memcpy( &u, &x, sizeof(u));
 
-    cl_uint exponent = u & 0x7f800000U;
+    cl_uint exponent = u &  0x7f800000U;
     cl_uint mantissa = u & ~0x7f800000U;
 
     // add 1 to the exponent
     exponent += 0x00800000U;
 
-    if ((cl_int)exponent < (cl_int)0x01000000)
+    if( (cl_int) exponent < (cl_int) 0x01000000 )
     { // subnormal, NaN, Inf
         mantissa |= 0x3f000000U;
 
         v = mantissa & 0xff800000U;
         u = mantissa;
-        memcpy(&fv, &v, sizeof(v));
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fv, &v, sizeof(v));
+        memcpy( &fu, &u, sizeof(u));
 
         fu -= fv;
 
-        memcpy(&v, &fv, sizeof(v));
-        memcpy(&u, &fu, sizeof(u));
+        memcpy( &v, &fv, sizeof(v));
+        memcpy( &u, &fu, sizeof(u));
 
-        exponent = u & 0x7f800000U;
+        exponent = u &  0x7f800000U;
         mantissa = u & ~0x7f800000U;
 
-        *iptr = (exponent >> 23) + (-126 + 1 - 126);
+        *iptr = (exponent >> 23) + (-126 + 1 -126);
         u = mantissa | 0x3f000000U;
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fu, &u, sizeof(u));
         return fu;
     }
 
     *iptr = (exponent >> 23) - 127;
     u = mantissa | 0x3f000000U;
-    memcpy(&fu, &u, sizeof(u));
+    memcpy( &fu, &u, sizeof(u));
     return fu;
 }
 
-static inline int extractf(float x, cl_uint *mant)
+static inline int extractf( float, cl_uint * );
+static inline int extractf( float x, cl_uint *mant )
 {
-    static float (*frexppf)(float, int *) = NULL;
+    static float (*frexppf)(float, int*) = NULL;
     int e;
 
     // verify that frexp works properly
-    if (NULL == frexppf)
+    if( NULL == frexppf )
     {
-        if (0.5f == frexpf(HEX_FLT(+, 1, 0, -, 130), &e) && e == -129)
+        if( 0.5f == frexpf( HEX_FLT( +, 1, 0, -, 130 ), &e ) && e == -129 )
             frexppf = frexpf;
         else
             frexppf = fallback_frexpf;
     }
 
-    *mant = (cl_uint)(HEX_FLT(+, 1, 0, +, 32) * fabsf(frexppf(x, &e)));
+    *mant = (cl_uint) (HEX_FLT( +, 1, 0, +, 32 ) * fabsf( frexppf( x, &e )));
     return e - 1;
 }
 
-// Shift right by shift bits. Any bits lost on the right side are bitwise OR'd
-// together and ORd into the LSB of the result
-static inline void shift_right_sticky_64(cl_ulong *p, int shift)
+// Shift right by shift bits. Any bits lost on the right side are bitwise OR'd together and ORd into the LSB of the result
+static inline void shift_right_sticky_64( cl_ulong *p, int shift );
+static inline void shift_right_sticky_64( cl_ulong *p, int shift )
 {
     cl_ulong sticky = 0;
     cl_ulong r = *p;
 
     // C doesn't handle shifts greater than the size of the variable dependably
-    if (shift >= 64)
+    if( shift >= 64 )
     {
         sticky |= (0 != r);
         r = 0;
     }
     else
     {
-        sticky |= (0 != (r << (64 - shift)));
+        sticky |= (0 != (r << (64-shift)));
         r >>= shift;
     }
 
     *p = r | sticky;
 }
 
-// Add two 64 bit mantissas. Bits that are below the LSB of the result are OR'd
-// into the LSB of the result
-static inline void add64(cl_ulong *p, cl_ulong c, int *exponent)
+// Add two 64 bit mantissas. Bits that are below the LSB of the result are OR'd into the LSB of the result
+static inline void add64( cl_ulong *p, cl_ulong c, int *exponent );
+static inline void add64( cl_ulong *p, cl_ulong c, int *exponent )
 {
     cl_ulong carry;
     c = add_carry(c, *p, &carry);
-    if (carry)
+    if( carry )
     {
-        carry = c & 1; // set aside sticky bit
-        c >>= 1; // right shift to deal with overflow
-        c |= carry
-            | 0x8000000000000000ULL; // or in carry bit, and sticky bit. The
-                                     // latter is to prevent rounding from
-                                     // believing we are exact half way case
-        *exponent = *exponent + 1; // adjust exponent
+        carry = c & 1;                              // set aside sticky bit
+        c >>= 1;                                    // right shift to deal with overflow
+        c |= carry | 0x8000000000000000ULL;         // or in carry bit, and sticky bit. The latter is to prevent rounding from believing we are exact half way case
+        *exponent = *exponent + 1;                  // adjust exponent
     }
 
     *p = c;
 }
 
 // IEEE-754 round to nearest, ties to even rounding
-static float round_to_nearest_even_float(cl_ulong p, int exponent)
+static float round_to_nearest_even_float( cl_ulong p, int exponent );
+static float round_to_nearest_even_float( cl_ulong p, int exponent )
 {
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    union{ cl_uint u; cl_float d;} u;
 
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
 
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
     {
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
 
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return INFINITY;
+        if( r > CL_FLT_MAX )
+            return INFINITY;
 
         return r;
     }
-    if (exponent == -150 && p > 0x8000000000000000ULL)
-        return HEX_FLT(+, 1, 0, -, 149);
-    if (exponent <= -150) return 0.0f;
+    if( exponent == -150 && p > 0x8000000000000000ULL)
+        return HEX_FLT( +, 1, 0, -, 149 );
+    if( exponent <= -150 )       return 0.0f;
 
-    // Figure out which bits go where
+    //Figure out which bits go where
     int shift = 8 + 32;
-    if (exponent < -126)
+    if( exponent < -126 )
     {
-        shift -= 126 + exponent; // subnormal: shift is not 52
-        exponent = -127; //            set exponent to 0
+        shift -= 126 + exponent;                    // subnormal: shift is not 52
+        exponent = -127;                            //            set exponent to 0
     }
     else
-        p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                    // it.
+        p &= 0x7fffffffffffffffULL;                 // normal: leading bit is implicit. Remove it.
 
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(p >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23);
 
     // put a representation of the residual bits into hi
-    p <<= (64 - shift);
+    p <<= (64-shift);
 
-    // round to nearest, ties to even  based on the unused portion of p
-    if (p < 0x8000000000000000ULL) return u.d;
-    if (p == 0x8000000000000000ULL)
-        u.u += u.u & 1U;
-    else
-        u.u++;
+    //round to nearest, ties to even  based on the unused portion of p
+    if( p < 0x8000000000000000ULL )        return u.d;
+    if( p == 0x8000000000000000ULL )       u.u += u.u & 1U;
+    else                                   u.u++;
 
     return u.d;
 }
 
-static float round_to_nearest_even_float_ftz(cl_ulong p, int exponent)
+static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent );
+static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent )
 {
     extern int gCheckTininessBeforeRounding;
 
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    union{ cl_uint u; cl_float d;} u;
     int shift = 8 + 32;
 
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
 
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
     {
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
 
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return INFINITY;
+        if( r > CL_FLT_MAX )
+        return INFINITY;
 
         return r;
     }
 
     // Deal with FTZ for gCheckTininessBeforeRounding
-    if (exponent < (gCheckTininessBeforeRounding - 127)) return 0.0f;
+    if( exponent < (gCheckTininessBeforeRounding - 127) )
+        return 0.0f;
 
-    if (exponent
-        == -127) // only happens for machines that check tininess after rounding
-        p = (p & 1) | (p >> 1);
+    if( exponent == -127 ) // only happens for machines that check tininess after rounding
+        p = (p&1) | (p>>1);
     else
-        p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                    // it.
+        p &= 0x7fffffffffffffffULL;     // normal: leading bit is implicit. Remove it.
 
     cl_ulong q = p;
 
 
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(q >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23);
 
     // put a representation of the residual bits into hi
-    q <<= (64 - shift);
+    q <<= (64-shift);
 
-    // round to nearest, ties to even  based on the unused portion of p
-    if (q > 0x8000000000000000ULL)
+    //round to nearest, ties to even  based on the unused portion of p
+    if( q > 0x8000000000000000ULL )
         u.u++;
-    else if (q == 0x8000000000000000ULL)
+    else if( q == 0x8000000000000000ULL )
         u.u += u.u & 1U;
 
     // Deal with FTZ for ! gCheckTininessBeforeRounding
-    if (0 == (u.u & 0x7f800000U)) return 0.0f;
+    if( 0 == (u.u & 0x7f800000U )  )
+        return 0.0f;
 
     return u.d;
 }
 
 
 // IEEE-754 round toward zero.
-static float round_toward_zero_float(cl_ulong p, int exponent)
+static float round_toward_zero_float( cl_ulong p, int exponent );
+static float round_toward_zero_float( cl_ulong p, int exponent )
 {
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    union{ cl_uint u; cl_float d;} u;
 
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
 
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
     {
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
 
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return CL_FLT_MAX;
+        if( r > CL_FLT_MAX )
+            return CL_FLT_MAX;
 
         return r;
     }
 
-    if (exponent <= -149) return 0.0f;
+    if( exponent <= -149 )
+        return 0.0f;
 
-    // Figure out which bits go where
+    //Figure out which bits go where
     int shift = 8 + 32;
-    if (exponent < -126)
+    if( exponent < -126 )
     {
-        shift -= 126 + exponent; // subnormal: shift is not 52
-        exponent = -127; //            set exponent to 0
+        shift -= 126 + exponent;                    // subnormal: shift is not 52
+        exponent = -127;                            //            set exponent to 0
     }
     else
-        p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                    // it.
+        p &= 0x7fffffffffffffffULL;                 // normal: leading bit is implicit. Remove it.
 
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(p >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23);
 
     return u.d;
 }
 
-static float round_toward_zero_float_ftz(cl_ulong p, int exponent)
+static float round_toward_zero_float_ftz( cl_ulong p, int exponent );
+static float round_toward_zero_float_ftz( cl_ulong p, int exponent )
 {
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    extern int gCheckTininessBeforeRounding;
+
+    union{ cl_uint u; cl_float d;} u;
     int shift = 8 + 32;
 
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
 
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
     {
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
 
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return CL_FLT_MAX;
+        if( r > CL_FLT_MAX )
+            return CL_FLT_MAX;
 
         return r;
     }
 
     // Deal with FTZ for gCheckTininessBeforeRounding
-    if (exponent < -126) return 0.0f;
+    if( exponent < -126 )
+        return 0.0f;
 
-    cl_ulong q = p &=
-        0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it.
+    cl_ulong q = p &= 0x7fffffffffffffffULL;     // normal: leading bit is implicit. Remove it.
 
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(q >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23);
 
     // put a representation of the residual bits into hi
-    q <<= (64 - shift);
+    q <<= (64-shift);
 
     return u.d;
 }
 
 // Subtract two significands.
-static inline void sub64(cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC)
+static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC );
+static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC )
 {
     cl_ulong carry;
-    p = sub_carry(*c, p, &carry);
+    p = sub_carry( *c, p, &carry );
 
-    if (carry)
+    if( carry )
     {
         *signC ^= 0x80000000U;
         p = -p;
     }
 
     // normalize
-    if (p)
+    if( p )
     {
         int shift = 32;
         cl_ulong test = 1ULL << 32;
-        while (0 == (p & 0x8000000000000000ULL))
+        while( 0 == (p & 0x8000000000000000ULL))
         {
-            if (p < test)
+            if( p < test )
             {
                 p <<= shift;
                 *expC = *expC - shift;
@@ -449,60 +458,49 @@
     {
         // zero result.
         *expC = -200;
-        *signC =
-            0; // IEEE rules say a - a = +0 for all rounding modes except -inf
+        *signC = 0;     // IEEE rules say a - a = +0 for all rounding modes except -inf
     }
 
     *c = p;
 }
 
 
-float reference_fma(float a, float b, float c, int shouldFlush)
+float reference_fma( float a, float b, float c, int shouldFlush )
 {
     static const cl_uint kMSB = 0x80000000U;
 
     // Make bits accessible
-    union {
-        cl_uint u;
-        cl_float d;
-    } ua;
-    ua.d = a;
-    union {
-        cl_uint u;
-        cl_float d;
-    } ub;
-    ub.d = b;
-    union {
-        cl_uint u;
-        cl_float d;
-    } uc;
-    uc.d = c;
+    union{ cl_uint u; cl_float d; } ua; ua.d = a;
+    union{ cl_uint u; cl_float d; } ub; ub.d = b;
+    union{ cl_uint u; cl_float d; } uc; uc.d = c;
 
     // deal with Nans, infinities and zeros
-    if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b) || isinf(c)
-        || 0 == (ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior
-        0 == (ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior
-        0 == (uc.u & ~kMSB)) // c == 0, defeat host FTZ behavior
+    if( isnan( a ) || isnan( b ) || isnan(c)    ||
+        isinf( a ) || isinf( b ) || isinf(c)    ||
+        0 == ( ua.u & ~kMSB)                ||  // a == 0, defeat host FTZ behavior
+        0 == ( ub.u & ~kMSB)                ||  // b == 0, defeat host FTZ behavior
+        0 == ( uc.u & ~kMSB)                )   // c == 0, defeat host FTZ behavior
     {
         FPU_mode_type oldMode;
         RoundingMode oldRoundMode = kRoundToNearestEven;
-        if (isinf(c) && !isinf(a) && !isinf(b)) return (c + a) + b;
+        if( isinf( c ) && !isinf(a) && !isinf(b) )
+            return (c + a) + b;
 
-        if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        if (gIsInRTZMode)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
 
-        memset(&oldMode, 0, sizeof(oldMode));
-        if (shouldFlush) ForceFTZ(&oldMode);
+        memset( &oldMode, 0, sizeof( oldMode ) );
+        if( shouldFlush )
+            ForceFTZ( &oldMode );
 
-        a = (float)reference_multiply(
-            a, b); // some risk that the compiler will insert a non-compliant
-                   // fma here on some platforms.
-        a = (float)reference_add(
-            a,
-            c); // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
+        a = (float) reference_multiply( a, b );    // some risk that the compiler will insert a non-compliant fma here on some platforms.
+        a = (float) reference_add( a, c );           // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
 
-        if (shouldFlush) RestoreFPState(&oldMode);
+        if( shouldFlush )
+            RestoreFPState( &oldMode );
 
-        if (gIsInRTZMode) set_round(oldRoundMode, kfloat);
+        if( gIsInRTZMode )
+            set_round(oldRoundMode, kfloat);
         return a;
     }
 
@@ -510,70 +508,67 @@
     //   exponent is a standard unbiased signed integer
     //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
     cl_uint mantA, mantB, mantC;
-    int expA = extractf(a, &mantA);
-    int expB = extractf(b, &mantB);
-    int expC = extractf(c, &mantC);
-    cl_uint signC = uc.u & kMSB; // We'll need the sign bit of C later to decide
-                                 // if we are adding or subtracting
+    int expA = extractf( a, &mantA );
+    int expB = extractf( b, &mantB );
+    int expC = extractf( c, &mantC );
+    cl_uint signC = uc.u & kMSB;                // We'll need the sign bit of C later to decide if we are adding or subtracting
 
-    // exact product of A and B
+// exact product of A and B
     int exponent = expA + expB;
     cl_uint sign = (ua.u ^ ub.u) & kMSB;
-    cl_ulong product = (cl_ulong)mantA * (cl_ulong)mantB;
+    cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB;
 
     // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
-    //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact
-    //  that we got another power of two from the multiplication
-    if (0 == (0x8000000000000000ULL & product))
+    //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication
+    if( 0 == (0x8000000000000000ULL & product) )
         product <<= 1;
     else
-        exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then our
-                    // exponent increased.
+        exponent++;         // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased.
 
-    // infinite precision add
-    cl_ulong addend = (cl_ulong)mantC << 32;
-    if (exponent >= expC)
+//infinite precision add
+    cl_ulong addend = (cl_ulong) mantC << 32;
+    if( exponent >= expC )
     {
         // Shift C relative to the product so that their exponents match
-        if (exponent > expC) shift_right_sticky_64(&addend, exponent - expC);
+        if( exponent > expC )
+            shift_right_sticky_64( &addend, exponent - expC );
 
         // Add
-        if (sign ^ signC)
-            sub64(&product, addend, &sign, &exponent);
+        if( sign ^ signC )
+            sub64( &product, addend, &sign, &exponent );
         else
-            add64(&product, addend, &exponent);
+            add64( &product, addend, &exponent );
     }
     else
     {
         // Shift the product relative to C so that their exponents match
-        shift_right_sticky_64(&product, expC - exponent);
+        shift_right_sticky_64( &product, expC - exponent );
 
         // add
-        if (sign ^ signC)
-            sub64(&addend, product, &signC, &expC);
+        if( sign ^ signC )
+            sub64( &addend, product, &signC, &expC );
         else
-            add64(&addend, product, &expC);
+            add64( &addend, product, &expC );
 
         product = addend;
         exponent = expC;
         sign = signC;
     }
 
-    // round to IEEE result -- we do not do flushing to zero here. That part is
-    // handled manually in ternary.c.
+    // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c.
     if (gIsInRTZMode)
     {
-        if (shouldFlush)
-            ua.d = round_toward_zero_float_ftz(product, exponent);
+        if( shouldFlush )
+            ua.d = round_toward_zero_float_ftz( product, exponent);
         else
-            ua.d = round_toward_zero_float(product, exponent);
+            ua.d = round_toward_zero_float( product, exponent);
     }
     else
     {
-        if (shouldFlush)
-            ua.d = round_to_nearest_even_float_ftz(product, exponent);
+        if( shouldFlush )
+            ua.d = round_to_nearest_even_float_ftz( product, exponent);
         else
-            ua.d = round_to_nearest_even_float(product, exponent);
+            ua.d = round_to_nearest_even_float( product, exponent);
     }
 
     // Set the sign
@@ -582,36 +577,35 @@
     return ua.d;
 }
 
-double reference_relaxed_exp10(double x) { return reference_exp10(x); }
-
-double reference_exp10(double x)
+double reference_relaxed_exp10( double x)
 {
-    return reference_exp2(x * HEX_DBL(+, 1, a934f0979a371, +, 1));
+  return reference_exp10(x);
 }
 
+double reference_exp10( double x) {   return reference_exp2( x * HEX_DBL( +, 1, a934f0979a371, +, 1 ) );    }
 
-int reference_ilogb(double x)
+
+int   reference_ilogb( double x )
 {
     extern int gDeviceILogb0, gDeviceILogbNaN;
-    union {
-        cl_double f;
-        cl_ulong u;
-    } u;
+    union { cl_double f; cl_ulong u;} u;
 
-    u.f = (float)x;
-    cl_int exponent = (cl_int)(u.u >> 52) & 0x7ff;
-    if (exponent == 0x7ff)
+    u.f = (float) x;
+    cl_int exponent = (cl_int) (u.u >> 52) & 0x7ff;
+    if( exponent == 0x7ff )
     {
-        if (u.u & 0x000fffffffffffffULL) return gDeviceILogbNaN;
+        if( u.u & 0x000fffffffffffffULL )
+            return gDeviceILogbNaN;
 
         return CL_INT_MAX;
     }
 
-    if (exponent == 0)
-    { // deal with denormals
-        u.f = x * HEX_DBL(+, 1, 0, +, 64);
-        exponent = (cl_int)(u.u >> 52) & 0x7ff;
-        if (exponent == 0) return gDeviceILogb0;
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f = x * HEX_DBL( +, 1, 0, +, 64 );
+        exponent = (cl_int) (u.u >> 52) & 0x7ff;
+        if( exponent == 0 )
+            return gDeviceILogb0;
 
         return exponent - (1023 + 64);
     }
@@ -619,205 +613,218 @@
     return exponent - 1023;
 }
 
-double reference_nan(cl_uint x)
+double reference_nan( cl_uint x )
 {
-    union {
-        cl_uint u;
-        cl_float f;
-    } u;
+    union{ cl_uint u; cl_float f; }u;
     u.u = x | 0x7fc00000U;
-    return (double)u.f;
+    return (double) u.f;
 }
 
-double reference_maxmag(double x, double y)
+double reference_maxmag( double x, double y )
 {
     double fabsx = fabs(x);
     double fabsy = fabs(y);
 
-    if (fabsx < fabsy) return y;
+    if( fabsx < fabsy )
+        return y;
 
-    if (fabsy < fabsx) return x;
+    if( fabsy < fabsx )
+        return x;
 
-    return reference_fmax(x, y);
+    return reference_fmax( x, y );
 }
 
-double reference_minmag(double x, double y)
+double reference_minmag( double x, double y )
 {
     double fabsx = fabs(x);
     double fabsy = fabs(y);
 
-    if (fabsx > fabsy) return y;
+    if( fabsx > fabsy )
+        return y;
 
-    if (fabsy > fabsx) return x;
+    if( fabsy > fabsx )
+        return x;
 
-    return reference_fmin(x, y);
+    return reference_fmin( x, y );
 }
 
-double reference_relaxed_mad(double a, double b, double c)
+//double my_nextafter( double x, double y ){  return (double) nextafterf( (float) x, (float) y ); }
+
+double reference_relaxed_mad( double a, double b, double c)
 {
-    return ((float)a) * ((float)b) + (float)c;
+  return ((float) a )* ((float) b) + (float) c;
 }
 
-double reference_mad(double a, double b, double c) { return a * b + c; }
+double reference_mad( double a, double b, double c )
+{
+    return a * b + c;
+}
 
-double reference_recip(double x) { return 1.0 / x; }
-double reference_rootn(double x, int i)
+double reference_recip( double x) {   return 1.0 / x; }
+double reference_rootn( double x, int i )
 {
 
-    // rootn ( x, 0 )  returns a NaN.
-    if (0 == i) return cl_make_nan();
+    //rootn ( x, 0 )  returns a NaN.
+    if( 0 == i )
+        return cl_make_nan();
 
-    // rootn ( x, n )  returns a NaN for x < 0 and n is even.
-    if (x < 0 && 0 == (i & 1)) return cl_make_nan();
+    //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+    if( x < 0 && 0 == (i&1) )
+        return cl_make_nan();
 
-    if (x == 0.0)
+    if( x == 0.0 )
     {
-        switch (i & 0x80000001)
+        switch( i & 0x80000001 )
         {
-            // rootn ( +-0,  n ) is +0 for even n > 0.
-            case 0: return 0.0f;
+            //rootn ( +-0,  n ) is +0 for even n > 0.
+            case 0:
+                return 0.0f;
 
-            // rootn ( +-0,  n ) is +-0 for odd n > 0.
-            case 1: return x;
+            //rootn ( +-0,  n ) is +-0 for odd n > 0.
+            case 1:
+                return x;
 
-            // rootn ( +-0,  n ) is +inf for even n < 0.
-            case 0x80000000: return INFINITY;
+            //rootn ( +-0,  n ) is +inf for even n < 0.
+            case 0x80000000:
+                return INFINITY;
 
-            // rootn ( +-0,  n ) is +-inf for odd n < 0.
-            case 0x80000001: return copysign(INFINITY, x);
+            //rootn ( +-0,  n ) is +-inf for odd n < 0.
+            case 0x80000001:
+                return copysign(INFINITY, x);
         }
     }
 
     double sign = x;
     x = reference_fabs(x);
-    x = reference_exp2(reference_log2(x) / (double)i);
-    return reference_copysignd(x, sign);
+    x = reference_exp2( reference_log2(x) / (double) i );
+    return reference_copysignd( x, sign );
 }
 
-double reference_rsqrt(double x) { return 1.0 / reference_sqrt(x); }
-
-double reference_sinpi(double x)
+double reference_rsqrt( double x) {   return 1.0 / reference_sqrt(x);   }
+//double reference_sincos( double x, double *c ){ *c = cos(x); return sin(x); }
+double reference_sinpi( double x)
 {
     double r = reduce1(x);
 
     // reduce to [-0.5, 0.5]
-    if (r < -0.5)
+    if( r < -0.5 )
         r = -1 - r;
-    else if (r > 0.5)
+    else if ( r > 0.5 )
         r = 1 - r;
 
     // sinPi zeros have the same sign as x
-    if (r == 0.0) return reference_copysignd(0.0, x);
+    if( r == 0.0 )
+        return reference_copysignd(0.0, x);
 
-    return reference_sin(r * M_PI);
+    return reference_sin( r * M_PI );
 }
 
-double reference_relaxed_sinpi(double x) { return reference_sinpi(x); }
-
-double reference_tanpi(double x)
+double reference_tanpi( double x)
 {
     // set aside the sign  (allows us to preserve sign of -0)
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysignd( 1.0, x);
     double z = reference_fabs(x);
 
     // if big and even  -- caution: only works if x only has single precision
-    if (z >= HEX_DBL(+, 1, 0, +, 24))
+    if( z >= HEX_DBL( +, 1, 0, +, 24 ) )
     {
-        if (z == INFINITY) return x - x; // nan
+        if( z == INFINITY )
+            return x - x;       // nan
 
-        return reference_copysignd(
-            0.0, x); // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
+        return reference_copysignd( 0.0, x);   // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
     }
 
     // reduce to the range [ -0.5, 0.5 ]
-    double nearest = reference_rint(z); // round to nearest even places n + 0.5
-                                        // values in the right place for us
-    int i = (int)nearest; // test above against 0x1.0p24 avoids overflow here
+    double nearest = reference_rint( z );     // round to nearest even places n + 0.5 values in the right place for us
+    int i = (int) nearest;          // test above against 0x1.0p24 avoids overflow here
     z -= nearest;
 
-    // correction for odd integer x for the right sign of zero
-    if ((i & 1) && z == 0.0) sign = -sign;
+    //correction for odd integer x for the right sign of zero
+    if( (i&1) && z == 0.0 )
+        sign = -sign;
 
     // track changes to the sign
-    sign *= reference_copysignd(1.0, z); // really should just be an xor
-    z = reference_fabs(z); // remove the sign again
+    sign *= reference_copysignd(1.0, z);       // really should just be an xor
+    z = reference_fabs(z);                    // remove the sign again
 
     // reduce once more
-    // If we don't do this, rounding error in z * M_PI will cause us not to
-    // return infinities properly
-    if (z > 0.25)
+    // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly
+    if( z > 0.25 )
     {
         z = 0.5 - z;
-        return sign
-            / reference_tan(z * M_PI); // use system tan to get the right result
+        return sign / reference_tan( z * M_PI );      // use system tan to get the right result
     }
 
     //
-    return sign
-        * reference_tan(z * M_PI); // use system tan to get the right result
+    return sign * reference_tan( z * M_PI );          // use system tan to get the right result
 }
 
-double reference_pown(double x, int i) { return reference_pow(x, (double)i); }
-double reference_powr(double x, double y)
+double reference_pown( double x, int i) { return reference_pow( x, (double) i ); }
+double reference_powr( double x, double y )
 {
-    // powr ( x, y ) returns NaN for x < 0.
-    if (x < 0.0) return cl_make_nan();
+    //powr ( x, y ) returns NaN for x < 0.
+    if( x < 0.0 )
+        return cl_make_nan();
 
-    // powr ( x, NaN ) returns the NaN for x >= 0.
-    // powr ( NaN, y ) returns the NaN.
-    if (isnan(x) || isnan(y))
-        return x + y; // Note: behavior different here than for pow(1,NaN),
-                      // pow(NaN, 0)
+    //powr ( x, NaN ) returns the NaN for x >= 0.
+    //powr ( NaN, y ) returns the NaN.
+    if( isnan(x) || isnan(y) )
+        return x + y;       // Note: behavior different here than for pow(1,NaN), pow(NaN, 0)
 
-    if (x == 1.0)
+    if( x == 1.0 )
     {
-        // powr ( +1, +-inf ) returns NaN.
-        if (reference_fabs(y) == INFINITY) return cl_make_nan();
+        //powr ( +1, +-inf ) returns NaN.
+        if( reference_fabs(y) == INFINITY )
+            return cl_make_nan();
 
-        // powr ( +1, y ) is 1 for finite y.    (NaN handled above)
+        //powr ( +1, y ) is 1 for finite y.    (NaN handled above)
         return 1.0;
     }
 
-    if (y == 0.0)
+    if( y == 0.0 )
     {
-        // powr ( +inf, +-0 ) returns NaN.
-        // powr ( +-0, +-0 ) returns NaN.
-        if (x == 0.0 || x == INFINITY) return cl_make_nan();
+        //powr ( +inf, +-0 ) returns NaN.
+        //powr ( +-0, +-0 ) returns NaN.
+        if( x == 0.0 || x == INFINITY )
+            return cl_make_nan();
 
-        // powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already
-        // handled above)
+        //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
         return 1.0;
     }
 
-    if (x == 0.0)
+    if( x == 0.0 )
     {
-        // powr ( +-0, -inf) is +inf.
-        // powr ( +-0, y ) is +inf for finite y < 0.
-        if (y < 0.0) return INFINITY;
+        //powr ( +-0, -inf) is +inf.
+        //powr ( +-0, y ) is +inf for finite y < 0.
+        if( y < 0.0 )
+            return INFINITY;
 
-        // powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+        //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
         return 0.0;
     }
 
     // x = +inf
-    if (isinf(x))
+    if( isinf(x) )
     {
-        if (y < 0) return 0;
+        if( y < 0 )
+            return 0;
         return INFINITY;
     }
 
     double fabsx = reference_fabs(x);
     double fabsy = reference_fabs(y);
 
-    // y = +-inf cases
-    if (isinf(fabsy))
+    //y = +-inf cases
+    if( isinf(fabsy) )
     {
-        if (y < 0)
+        if( y < 0 )
         {
-            if (fabsx < 1) return INFINITY;
+            if( fabsx < 1 )
+                return INFINITY;
             return 0;
         }
-        if (fabsx < 1) return 0;
+        if( fabsx < 1 )
+            return 0;
         return INFINITY;
     }
 
@@ -829,209 +836,169 @@
     return result;
 }
 
-double reference_fract(double x, double *ip)
+double reference_fract( double x, double *ip )
 {
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *ip = cl_make_nan();
         return cl_make_nan();
     }
 
     float i;
-    float f = modff((float)x, &i);
-    if (f < 0.0)
+    float f = modff((float) x, &i );
+    if( f < 0.0 )
     {
         f = 1.0f + f;
         i -= 1.0f;
-        if (f == 1.0f) f = HEX_FLT(+, 1, fffffe, -, 1);
+        if( f == 1.0f )
+            f = HEX_FLT( +, 1, fffffe, -, 1 );
     }
     *ip = i;
     return f;
 }
 
 
-double reference_add(double x, double y)
+//double my_fdim( double x, double y){ return fdimf( (float) x, (float) y ); }
+double reference_add( double x, double y )
 {
-    volatile float a = (float)x;
-    volatile float b = (float)y;
+    volatile float a = (float) x;
+    volatile float b = (float) y;
 
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
     // defeat x87
-    __m128 va = _mm_set_ss((float)a);
-    __m128 vb = _mm_set_ss((float)b);
-    va = _mm_add_ss(va, vb);
-    _mm_store_ss((float *)&a, va);
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_add_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
 #elif defined(__PPC__)
-    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
-    // denorm's to zero. As such, the reference add with FTZ must be emulated in
-    // sw.
-    if (fpu_control & _FPU_MASK_NI)
-    {
-        union {
-            cl_uint u;
-            cl_float d;
-        } ua;
-        ua.d = a;
-        union {
-            cl_uint u;
-            cl_float d;
-        } ub;
-        ub.d = b;
-        cl_uint mantA, mantB;
-        cl_ulong addendA, addendB, sum;
-        int expA = extractf(a, &mantA);
-        int expB = extractf(b, &mantB);
-        cl_uint signA = ua.u & 0x80000000U;
-        cl_uint signB = ub.u & 0x80000000U;
+    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero.
+    // As such, the reference add with FTZ must be emulated in sw.
+    if (fpu_control & _FPU_MASK_NI) {
+      union{ cl_uint u; cl_float d; } ua; ua.d = a;
+      union{ cl_uint u; cl_float d; } ub; ub.d = b;
+      cl_uint mantA, mantB;
+      cl_ulong addendA, addendB, sum;
+      int expA = extractf( a, &mantA );
+      int expB = extractf( b, &mantB );
+      cl_uint signA = ua.u & 0x80000000U;
+      cl_uint signB = ub.u & 0x80000000U;
 
-        // Force matching exponents if an operand is 0
-        if (a == 0.0f)
-        {
-            expA = expB;
-        }
-        else if (b == 0.0f)
-        {
-            expB = expA;
-        }
+      // Force matching exponents if an operand is 0
+      if (a == 0.0f) {
+    expA = expB;
+      } else if (b == 0.0f) {
+    expB = expA;
+      }
 
-        addendA = (cl_ulong)mantA << 32;
-        addendB = (cl_ulong)mantB << 32;
+      addendA = (cl_ulong)mantA << 32;
+      addendB = (cl_ulong)mantB << 32;
 
-        if (expA >= expB)
-        {
-            // Shift B relative to the A so that their exponents match
-            if (expA > expB) shift_right_sticky_64(&addendB, expA - expB);
+      if (expA >= expB) {
+        // Shift B relative to the A so that their exponents match
+        if( expA > expB )
+      shift_right_sticky_64( &addendB, expA - expB );
 
-            // add
-            if (signA ^ signB)
-                sub64(&addendA, addendB, &signA, &expA);
-            else
-                add64(&addendA, addendB, &expA);
-        }
+        // add
+        if( signA ^ signB )
+      sub64( &addendA, addendB, &signA, &expA );
         else
-        {
-            // Shift the A relative to B so that their exponents match
-            shift_right_sticky_64(&addendA, expB - expA);
+      add64( &addendA, addendB, &expA );
+      } else  {
+        // Shift the A relative to B so that their exponents match
+        shift_right_sticky_64( &addendA, expB - expA );
 
-            // add
-            if (signA ^ signB)
-                sub64(&addendB, addendA, &signB, &expB);
-            else
-                add64(&addendB, addendA, &expB);
-
-            addendA = addendB;
-            expA = expB;
-            signA = signB;
-        }
-
-        // round to IEEE result
-        if (gIsInRTZMode)
-        {
-            ua.d = round_toward_zero_float_ftz(addendA, expA);
-        }
+        // add
+        if( signA ^ signB )
+      sub64( &addendB, addendA, &signB, &expB );
         else
-        {
-            ua.d = round_to_nearest_even_float_ftz(addendA, expA);
-        }
-        // Set the sign
-        ua.u |= signA;
-        a = ua.d;
-    }
-    else
-    {
-        a += b;
+      add64( &addendB, addendA, &expB );
+
+        addendA = addendB;
+        expA = expB;
+        signA = signB;
+      }
+
+      // round to IEEE result
+      if (gIsInRTZMode)    {
+    ua.d = round_toward_zero_float_ftz( addendA, expA );
+      } else {
+    ua.d = round_to_nearest_even_float_ftz( addendA, expA );
+      }
+      // Set the sign
+      ua.u |= signA;
+      a = ua.d;
+    } else {
+      a += b;
     }
 #else
     a += b;
 #endif
-    return (double)a;
-}
+    return (double) a;
+ }
 
 
-double reference_subtract(double x, double y)
+double reference_subtract( double x, double y )
 {
-    volatile float a = (float)x;
-    volatile float b = (float)y;
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
+    volatile float a = (float) x;
+    volatile float b = (float) y;
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
     // defeat x87
-    __m128 va = _mm_set_ss((float)a);
-    __m128 vb = _mm_set_ss((float)b);
-    va = _mm_sub_ss(va, vb);
-    _mm_store_ss((float *)&a, va);
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_sub_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
 #else
     a -= b;
 #endif
     return a;
 }
 
-double reference_multiply(double x, double y)
+//double reference_divide( double x, double y ){ return (float) x / (float) y; }
+double reference_multiply( double x, double y)
 {
-    volatile float a = (float)x;
-    volatile float b = (float)y;
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
+    volatile float a = (float) x;
+    volatile float b = (float) y;
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
     // defeat x87
-    __m128 va = _mm_set_ss((float)a);
-    __m128 vb = _mm_set_ss((float)b);
-    va = _mm_mul_ss(va, vb);
-    _mm_store_ss((float *)&a, va);
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_mul_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
 #elif defined(__PPC__)
-    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
-    // denorm's to zero. As such, the reference multiply with FTZ must be
-    // emulated in sw.
-    if (fpu_control & _FPU_MASK_NI)
-    {
-        // extract exponent and mantissa
-        //   exponent is a standard unbiased signed integer
-        //   mantissa is a cl_uint, with leading non-zero bit positioned at the
-        //   MSB
-        union {
-            cl_uint u;
-            cl_float d;
-        } ua;
-        ua.d = a;
-        union {
-            cl_uint u;
-            cl_float d;
-        } ub;
-        ub.d = b;
-        cl_uint mantA, mantB;
-        int expA = extractf(a, &mantA);
-        int expB = extractf(b, &mantB);
+    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero.
+    // As such, the reference multiply with FTZ must be emulated in sw.
+    if (fpu_control & _FPU_MASK_NI) {
+      // extract exponent and mantissa
+      //   exponent is a standard unbiased signed integer
+      //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
+      union{ cl_uint u; cl_float d; } ua; ua.d = a;
+      union{ cl_uint u; cl_float d; } ub; ub.d = b;
+      cl_uint mantA, mantB;
+      int expA = extractf( a, &mantA );
+      int expB = extractf( b, &mantB );
 
-        // exact product of A and B
-        int exponent = expA + expB;
-        cl_uint sign = (ua.u ^ ub.u) & 0x80000000U;
-        cl_ulong product = (cl_ulong)mantA * (cl_ulong)mantB;
+      // exact product of A and B
+      int exponent = expA + expB;
+      cl_uint sign = (ua.u ^ ub.u) & 0x80000000U;
+      cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB;
 
-        // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
-        //  The MSB might not be set. If so, fix that. Otherwise, reflect the
-        //  fact that we got another power of two from the multiplication
-        if (0 == (0x8000000000000000ULL & product))
-            product <<= 1;
-        else
-            exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then
-                        // our exponent increased.
+      // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
+      //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication
+      if( 0 == (0x8000000000000000ULL & product) )
+        product <<= 1;
+      else
+        exponent++;         // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased.
 
-        // round to IEEE result -- we do not do flushing to zero here. That part
-        // is handled manually in ternary.c.
-        if (gIsInRTZMode)
-        {
-            ua.d = round_toward_zero_float_ftz(product, exponent);
-        }
-        else
-        {
-            ua.d = round_to_nearest_even_float_ftz(product, exponent);
-        }
-        // Set the sign
-        ua.u |= sign;
-        a = ua.d;
-    }
-    else
-    {
-        a *= b;
+      // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c.
+      if (gIsInRTZMode)    {
+    ua.d = round_toward_zero_float_ftz( product, exponent);
+      } else {
+    ua.d = round_to_nearest_even_float_ftz( product, exponent);
+      }
+      // Set the sign
+      ua.u |= sign;
+      a = ua.d;
+    } else {
+      a *= b;
     }
 #else
     a *= b;
@@ -1039,7 +1006,19 @@
     return a;
 }
 
-double reference_lgamma_r(double x, int *signp)
+/*double my_remquo( double x, double y, int *iptr )
+{
+    if( isnan(x) || isnan(y) ||
+        fabs(x) == INFINITY  ||
+        y == 0.0 )
+    {
+        *iptr = 0;
+        return NAN;
+    }
+
+    return (double) remquof( (float) x, (float) y, iptr );
+}*/
+double reference_lgamma_r( double x, int *signp )
 {
     // This is not currently tested
     *signp = 0;
@@ -1047,276 +1026,264 @@
 }
 
 
-int reference_isequal(double x, double y) { return x == y; }
-int reference_isfinite(double x) { return 0 != isfinite(x); }
-int reference_isgreater(double x, double y) { return x > y; }
-int reference_isgreaterequal(double x, double y) { return x >= y; }
-int reference_isinf(double x) { return 0 != isinf(x); }
-int reference_isless(double x, double y) { return x < y; }
-int reference_islessequal(double x, double y) { return x <= y; }
-int reference_islessgreater(double x, double y)
-{
-    return 0 != islessgreater(x, y);
-}
-int reference_isnan(double x) { return 0 != isnan(x); }
-int reference_isnormal(double x) { return 0 != isnormal((float)x); }
-int reference_isnotequal(double x, double y) { return x != y; }
-int reference_isordered(double x, double y) { return x == x && y == y; }
-int reference_isunordered(double x, double y) { return isnan(x) || isnan(y); }
-int reference_signbit(float x) { return 0 != signbit(x); }
+int reference_isequal( double x, double y ){ return x == y; }
+int reference_isfinite( double x ){ return 0 != isfinite(x); }
+int reference_isgreater( double x, double y ){ return x > y; }
+int reference_isgreaterequal( double x, double y ){ return x >= y; }
+int reference_isinf( double x ){ return 0 != isinf(x); }
+int reference_isless( double x, double y ){ return x < y; }
+int reference_islessequal( double x, double y ){ return x <= y; }
+int reference_islessgreater( double x, double y ){  return 0 != islessgreater( x, y ); }
+int reference_isnan( double x ){ return 0 != isnan( x ); }
+int reference_isnormal( double x ){ return 0 != isnormal( (float) x ); }
+int reference_isnotequal( double x, double y ){ return x != y; }
+int reference_isordered( double x, double y){ return x == x && y == y; }
+int reference_isunordered( double x, double y ){ return isnan(x) || isnan( y ); }
+int reference_signbit( float x ){ return 0 != signbit( x ); }
 
 #if 1 // defined( _MSC_VER )
 
-// Missing functions for win32
+//Missing functions for win32
 
 
-float reference_copysign(float x, float y)
+float reference_copysign( float x, float y )
 {
-    union {
-        float f;
-        cl_uint u;
-    } ux, uy;
-    ux.f = x;
-    uy.f = y;
+    union { float f; cl_uint u;} ux, uy;
+    ux.f = x; uy.f = y;
     ux.u &= 0x7fffffffU;
     ux.u |= uy.u & 0x80000000U;
     return ux.f;
 }
 
 
-double reference_copysignd(double x, double y)
+double reference_copysignd( double x, double y )
 {
-    union {
-        double f;
-        cl_ulong u;
-    } ux, uy;
-    ux.f = x;
-    uy.f = y;
+    union { double f; cl_ulong u;} ux, uy;
+    ux.f = x; uy.f = y;
     ux.u &= 0x7fffffffffffffffULL;
     ux.u |= uy.u & 0x8000000000000000ULL;
     return ux.f;
 }
 
 
-double reference_round(double x)
+double reference_round( double x )
 {
     double absx = reference_fabs(x);
-    if (absx < 0.5) return reference_copysignd(0.0, x);
+    if( absx < 0.5 )
+        return reference_copysignd( 0.0, x );
 
-    if (absx < HEX_DBL(+, 1, 0, +, 53))
-        x = reference_trunc(x + reference_copysignd(0.5, x));
+    if( absx < HEX_DBL( +, 1, 0, +, 53 ) )
+        x = reference_trunc( x + reference_copysignd( 0.5, x ) );
 
     return x;
 }
 
-double reference_trunc(double x)
+double reference_trunc( double x )
 {
-    if (fabs(x) < HEX_DBL(+, 1, 0, +, 53))
+    if( fabs(x) < HEX_DBL( +, 1, 0, +, 53 ) )
     {
-        cl_long l = (cl_long)x;
+        cl_long l = (cl_long) x;
 
-        return reference_copysignd((double)l, x);
+        return reference_copysignd( (double) l, x );
     }
 
     return x;
 }
 
 #ifndef FP_ILOGB0
-#define FP_ILOGB0 INT_MIN
+    #define FP_ILOGB0   INT_MIN
 #endif
 
 #ifndef FP_ILOGBNAN
-#define FP_ILOGBNAN INT_MAX
+    #define FP_ILOGBNAN   INT_MAX
 #endif
 
 
-double reference_cbrt(double x)
-{
-    return reference_copysignd(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
-}
 
-double reference_rint(double x)
+double reference_cbrt(double x){ return reference_copysignd( reference_pow( reference_fabs(x), 1.0/3.0 ), x ); }
+
+/*
+double reference_scalbn(double x, int i)
+{ // suitable for checking single precision scalbnf only
+
+    if( i > 300 )
+        return copysign( INFINITY, x);
+    if( i < -300 )
+        return copysign( 0.0, x);
+
+    union{ cl_ulong u; double d;} u;
+    u.u = ((cl_ulong) i + 1023) << 52;
+
+    return x * u.d;
+}
+*/
+
+double reference_rint( double x )
 {
-    if (reference_fabs(x) < HEX_DBL(+, 1, 0, +, 52))
+    if( reference_fabs(x) < HEX_DBL( +, 1, 0, +, 52 )  )
     {
-        double magic = reference_copysignd(HEX_DBL(+, 1, 0, +, 52), x);
+        double magic = reference_copysignd( HEX_DBL( +, 1, 0, +, 52 ), x );
         double rounded = (x + magic) - magic;
-        x = reference_copysignd(rounded, x);
+        x = reference_copysignd( rounded, x );
     }
 
     return x;
 }
 
-double reference_acosh(double x)
+double reference_acosh( double x )
 { // not full precision. Sufficient precision to cover float
-    if (isnan(x)) return x + x;
+    if( isnan(x) )
+        return x + x;
 
-    if (x < 1.0) return cl_make_nan();
+    if( x < 1.0 )
+        return cl_make_nan();
 
-    return reference_log(x + reference_sqrt(x + 1) * reference_sqrt(x - 1));
+    return reference_log( x + reference_sqrt(x + 1) * reference_sqrt(x-1) );
 }
 
-double reference_asinh(double x)
+double reference_asinh( double x )
 {
-    /*
-     * ====================================================
-     * This function is from fdlibm: http://www.netlib.org
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (isnan(x) || isinf(x)) return x + x;
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x) || isinf(x) )
+        return x + x;
 
     double absx = reference_fabs(x);
-    if (absx < HEX_DBL(+, 1, 0, -, 28)) return x;
+    if( absx < HEX_DBL( +, 1, 0, -, 28 ) )
+        return x;
 
     double sign = reference_copysignd(1.0, x);
 
-    if (absx > HEX_DBL(+, 1, 0, +, 28))
-        return sign
-            * (reference_log(absx)
-               + 0.693147180559945309417232121458176568); // log(2)
+    if( absx > HEX_DBL( +, 1, 0, +, 28 ) )
+        return sign * (reference_log( absx ) + 0.693147180559945309417232121458176568);    // log(2)
 
-    if (absx > 2.0)
-        return sign
-            * reference_log(2.0 * absx
-                            + 1.0 / (reference_sqrt(x * x + 1.0) + absx));
+    if( absx > 2.0 )
+        return sign * reference_log( 2.0 * absx + 1.0 / (reference_sqrt( x * x + 1.0 ) + absx));
 
-    return sign
-        * reference_log1p(absx + x * x / (1.0 + reference_sqrt(1.0 + x * x)));
+    return sign * reference_log1p( absx + x*x / (1.0 + reference_sqrt(1.0 + x*x)));
 }
 
 
-double reference_atanh(double x)
+double reference_atanh( double x )
 {
-    /*
-     * ====================================================
-     * This function is from fdlibm: http://www.netlib.org
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (isnan(x)) return x + x;
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x)  )
+        return x + x;
 
-    double signed_half = reference_copysignd(0.5, x);
+    double signed_half = reference_copysignd( 0.5, x );
     x = reference_fabs(x);
-    if (x > 1.0) return cl_make_nan();
+    if( x > 1.0 )
+        return cl_make_nan();
 
-    if (x < 0.5)
-        return signed_half * reference_log1p(2.0 * (x + x * x / (1 - x)));
+    if( x < 0.5 )
+        return signed_half * reference_log1p( 2.0 * ( x + x*x / (1-x) ) );
 
-    return signed_half * reference_log1p(2.0 * x / (1 - x));
+    return signed_half * reference_log1p(2.0 * x / (1-x));
 }
 
-double reference_relaxed_atan(double x) { return reference_atan(x); }
+double reference_relaxed_exp2( double x )
+{
+  return reference_exp2(x);
+}
 
-double reference_relaxed_exp2(double x) { return reference_exp2(x); }
-
-double reference_exp2(double x)
-{ // Note: only suitable for verifying single precision. Doesn't have range of a
-  // full double exp2 implementation.
-    if (x == 0.0) return 1.0;
+double reference_exp2( double x )
+{ // Note: only suitable for verifying single precision. Doesn't have range of a full double exp2 implementation.
+    if( x == 0.0 )
+        return 1.0;
 
     // separate x into fractional and integer parts
-    double i = reference_rint(x); // round to nearest integer
+    double i = reference_rint( x );        // round to nearest integer
 
-    if (i < -150) return 0.0;
+    if( i < -150 )
+        return 0.0;
 
-    if (i > 129) return INFINITY;
+    if( i > 129 )
+        return INFINITY;
 
-    double f = x - i; // -0.5 <= f <= 0.5
+    double f = x - i;            // -0.5 <= f <= 0.5
 
     // find exp2(f)
     // calculate as p(f) = (exp2(f)-1)/f
     //              exp2(f) = f * p(f) + 1
     // p(f) is a minimax polynomial with error within 0x1.c1fd80f0d1ab7p-50
 
-    double p = 0.693147180560184539289
-        + (0.240226506955902863183
-           + (0.055504108656833424373
-              + (0.009618129212846484796
-                 + (0.001333355902958566035
-                    + (0.000154034191902497930
-                       + (0.000015252317761038105
-                          + (0.000001326283129417092
-                             + 0.000000102593187638680 * f)
-                              * f)
-                           * f)
-                        * f)
-                     * f)
-                  * f)
-               * f)
-            * f;
+    double p = 0.693147180560184539289 +
+               (0.240226506955902863183 +
+               (0.055504108656833424373 +
+               (0.009618129212846484796 +
+               (0.001333355902958566035 +
+               (0.000154034191902497930 +
+               (0.000015252317761038105 +
+               (0.000001326283129417092 + 0.000000102593187638680 * f)*f)*f)*f)*f)*f)*f)*f;
     f *= p;
     f += 1.0;
 
     // scale by 2 ** i
-    union {
-        cl_ulong u;
-        double d;
-    } u;
-    int exponent = (int)i + 1023;
-    u.u = (cl_ulong)exponent << 52;
+    union{ cl_ulong u; double d; } u;
+    int exponent = (int) i + 1023;
+    u.u = (cl_ulong) exponent << 52;
 
     return f * u.d;
 }
 
 
-double reference_expm1(double x)
-{ // Note: only suitable for verifying single precision. Doesn't have range of a
-  // full double expm1 implementation. It is only accurate to 47 bits or less.
+double reference_expm1( double x )
+{ // Note: only suitable for verifying single precision. Doesn't have range of a full double expm1 implementation. It is only accurate to 47 bits or less.
 
     // early out for small numbers and NaNs
-    if (!(reference_fabs(x) > HEX_DBL(+, 1, 0, -, 24))) return x;
+    if( ! (reference_fabs(x) > HEX_DBL( +, 1, 0, -, 24 )) )
+        return x;
 
     // early out for large negative numbers
-    if (x < -130.0) return -1.0;
+    if( x < -130.0 )
+        return -1.0;
 
     // early out for large positive numbers
-    if (x > 100.0) return INFINITY;
+    if( x > 100.0 )
+        return INFINITY;
 
     // separate x into fractional and integer parts
-    double i = reference_rint(x); // round to nearest integer
-    double f = x - i; // -0.5 <= f <= 0.5
+    double i = reference_rint( x );        // round to nearest integer
+    double f = x - i;            // -0.5 <= f <= 0.5
 
     // reduce f to the range -0.0625 .. f.. 0.0625
-    int index = (int)(f * 16.0) + 8; // 0...16
+    int index = (int) (f * 16.0) + 8;       // 0...16
 
-    static const double reduction[17] = { -0.5,  -0.4375, -0.375, -0.3125,
-                                          -0.25, -0.1875, -0.125, -0.0625,
-                                          0.0,   +0.0625, +0.125, +0.1875,
-                                          +0.25, +0.3125, +0.375, +0.4375,
-                                          +0.5 };
+    static const double reduction[17] = { -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625,
+                                           0.0,
+                                          +0.0625, +0.125, +0.1875, +0.25, +0.3125, +0.375, +0.4375, +0.5  };
 
 
     // exponentials[i] = expm1(reduction[i])
-    static const double exponentials[17] = {
-        HEX_DBL(-, 1, 92e9a0720d3ec, -, 2),
-        HEX_DBL(-, 1, 6adb1cd9205ee, -, 2),
-        HEX_DBL(-, 1, 40373d42ce2e3, -, 2),
-        HEX_DBL(-, 1, 12d35a41ba104, -, 2),
-        HEX_DBL(-, 1, c5041854df7d4, -, 3),
-        HEX_DBL(-, 1, 5e25fb4fde211, -, 3),
-        HEX_DBL(-, 1, e14aed893eef4, -, 4),
-        HEX_DBL(-, 1, f0540438fd5c3, -, 5),
-        HEX_DBL(+, 0, 0, +, 0),
-        HEX_DBL(+, 1, 082b577d34ed8, -, 4),
-        HEX_DBL(+, 1, 10b022db7ae68, -, 3),
-        HEX_DBL(+, 1, a65c0b85ac1a9, -, 3),
-        HEX_DBL(+, 1, 22d78f0fa061a, -, 2),
-        HEX_DBL(+, 1, 77a45d8117fd5, -, 2),
-        HEX_DBL(+, 1, d1e944f6fbdaa, -, 2),
-        HEX_DBL(+, 1, 190048ef6002, -, 1),
-        HEX_DBL(+, 1, 4c2531c3c0d38, -, 1),
-    };
+    static const double exponentials[17] = {    HEX_DBL( -, 1, 92e9a0720d3ec, -, 2 ),    HEX_DBL( -, 1, 6adb1cd9205ee, -, 2 ),
+                                                HEX_DBL( -, 1, 40373d42ce2e3, -, 2 ),    HEX_DBL( -, 1, 12d35a41ba104, -, 2 ),
+                                                HEX_DBL( -, 1, c5041854df7d4, -, 3 ),    HEX_DBL( -, 1, 5e25fb4fde211, -, 3 ),
+                                                HEX_DBL( -, 1, e14aed893eef4, -, 4 ),    HEX_DBL( -, 1, f0540438fd5c3, -, 5 ),
+                                                HEX_DBL( +, 0, 0,             +, 0 ),
+                                                HEX_DBL( +, 1, 082b577d34ed8, -, 4 ),    HEX_DBL( +, 1, 10b022db7ae68, -, 3 ),
+                                                HEX_DBL( +, 1, a65c0b85ac1a9, -, 3 ),    HEX_DBL( +, 1, 22d78f0fa061a, -, 2 ),
+                                                HEX_DBL( +, 1, 77a45d8117fd5, -, 2 ),    HEX_DBL( +, 1, d1e944f6fbdaa, -, 2 ),
+                                                HEX_DBL( +, 1, 190048ef6002,  -, 1 ),    HEX_DBL( +, 1, 4c2531c3c0d38, -, 1 ),
+                                            };
 
 
     f -= reduction[index];
@@ -1324,368 +1291,223 @@
     // find expm1(f)
     // calculate as p(f) = (exp(f)-1)/f
     //              expm1(f) = f * p(f)
-    // p(f) is a minimax polynomial with error within 0x1.1d7693618d001p-48 over
-    // the range +- 0.0625
-    double p = 0.999999999999998001599
-        + (0.499999999999839628284
-           + (0.166666666672817459505
-              + (0.041666666612283048687
-                 + (0.008333330214567431435
-                    + (0.001389005319303770070 + 0.000198833381525156667 * f)
-                        * f)
-                     * f)
-                  * f)
-               * f)
-            * f;
+    // p(f) is a minimax polynomial with error within 0x1.1d7693618d001p-48 over the range +- 0.0625
+    double p = 0.999999999999998001599 +
+               (0.499999999999839628284 +
+               (0.166666666672817459505 +
+               (0.041666666612283048687 +
+               (0.008333330214567431435 +
+               (0.001389005319303770070 + 0.000198833381525156667 * f)*f)*f)*f)*f)*f;
     f *= p; // expm1( reduced f )
 
     // expm1(f) = (exmp1( reduced_f) + 1.0) * ( exponentials[index] + 1 ) - 1
-    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) +
-    //          exponentials[index] + 1 -1 =  exmp1( reduced_f) *
-    //          exponentials[index] + exmp1( reduced_f) + exponentials[index]
-    f += exponentials[index] + f * exponentials[index];
+    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index] + 1 -1
+    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index]
+    f +=  exponentials[index] + f * exponentials[index];
 
     // scale by e ** i
-    int exponent = (int)i;
-    if (0 == exponent) return f; // precise answer for x near 1
+    int exponent = (int) i;
+    if( 0 == exponent )
+        return f;       // precise answer for x near 1
 
     // table of e**(i-150)
-    static const double exp_table[128 + 150 + 1] = {
-        HEX_DBL(+, 1, 82e16284f5ec5, -, 217),
-        HEX_DBL(+, 1, 06e9996332ba1, -, 215),
-        HEX_DBL(+, 1, 6555cb289e44b, -, 214),
-        HEX_DBL(+, 1, e5ab364643354, -, 213),
-        HEX_DBL(+, 1, 4a0bd18e64df7, -, 211),
-        HEX_DBL(+, 1, c094499cc578e, -, 210),
-        HEX_DBL(+, 1, 30d759323998c, -, 208),
-        HEX_DBL(+, 1, 9e5278ab1d4cf, -, 207),
-        HEX_DBL(+, 1, 198fa3f30be25, -, 205),
-        HEX_DBL(+, 1, 7eae636d6144e, -, 204),
-        HEX_DBL(+, 1, 040f1036f4863, -, 202),
-        HEX_DBL(+, 1, 6174e477a895f, -, 201),
-        HEX_DBL(+, 1, e065b82dd95a, -, 200),
-        HEX_DBL(+, 1, 4676be491d129, -, 198),
-        HEX_DBL(+, 1, bbb5da5f7c823, -, 197),
-        HEX_DBL(+, 1, 2d884eef5fdcb, -, 195),
-        HEX_DBL(+, 1, 99d3397ab8371, -, 194),
-        HEX_DBL(+, 1, 1681497ed15b3, -, 192),
-        HEX_DBL(+, 1, 7a870f597fdbd, -, 191),
-        HEX_DBL(+, 1, 013c74edba307, -, 189),
-        HEX_DBL(+, 1, 5d9ec4ada7938, -, 188),
-        HEX_DBL(+, 1, db2edfd20fa7c, -, 187),
-        HEX_DBL(+, 1, 42eb9f39afb0b, -, 185),
-        HEX_DBL(+, 1, b6e4f282b43f4, -, 184),
-        HEX_DBL(+, 1, 2a42764857b19, -, 182),
-        HEX_DBL(+, 1, 9560792d19314, -, 181),
-        HEX_DBL(+, 1, 137b6ce8e052c, -, 179),
-        HEX_DBL(+, 1, 766b45dd84f18, -, 178),
-        HEX_DBL(+, 1, fce362fe6e7d, -, 177),
-        HEX_DBL(+, 1, 59d34dd8a5473, -, 175),
-        HEX_DBL(+, 1, d606847fc727a, -, 174),
-        HEX_DBL(+, 1, 3f6a58b795de3, -, 172),
-        HEX_DBL(+, 1, b2216c6efdac1, -, 171),
-        HEX_DBL(+, 1, 2705b5b153fb8, -, 169),
-        HEX_DBL(+, 1, 90fa1509bd50d, -, 168),
-        HEX_DBL(+, 1, 107df698da211, -, 166),
-        HEX_DBL(+, 1, 725ae6e7b9d35, -, 165),
-        HEX_DBL(+, 1, f75d6040aeff6, -, 164),
-        HEX_DBL(+, 1, 56126259e093c, -, 162),
-        HEX_DBL(+, 1, d0ec7df4f7bd4, -, 161),
-        HEX_DBL(+, 1, 3bf2cf6722e46, -, 159),
-        HEX_DBL(+, 1, ad6b22f55db42, -, 158),
-        HEX_DBL(+, 1, 23d1f3e5834a, -, 156),
-        HEX_DBL(+, 1, 8c9feab89b876, -, 155),
-        HEX_DBL(+, 1, 0d88cf37f00dd, -, 153),
-        HEX_DBL(+, 1, 6e55d2bf838a7, -, 152),
-        HEX_DBL(+, 1, f1e6b68529e33, -, 151),
-        HEX_DBL(+, 1, 525be4e4e601d, -, 149),
-        HEX_DBL(+, 1, cbe0a45f75eb1, -, 148),
-        HEX_DBL(+, 1, 3884e838aea68, -, 146),
-        HEX_DBL(+, 1, a8c1f14e2af5d, -, 145),
-        HEX_DBL(+, 1, 20a717e64a9bd, -, 143),
-        HEX_DBL(+, 1, 8851d84118908, -, 142),
-        HEX_DBL(+, 1, 0a9bdfb02d24, -, 140),
-        HEX_DBL(+, 1, 6a5bea046b42e, -, 139),
-        HEX_DBL(+, 1, ec7f3b269efa8, -, 138),
-        HEX_DBL(+, 1, 4eafb87eab0f2, -, 136),
-        HEX_DBL(+, 1, c6e2d05bbc, -, 135),
-        HEX_DBL(+, 1, 35208867c2683, -, 133),
-        HEX_DBL(+, 1, a425b317eeacd, -, 132),
-        HEX_DBL(+, 1, 1d8508fa8246a, -, 130),
-        HEX_DBL(+, 1, 840fbc08fdc8a, -, 129),
-        HEX_DBL(+, 1, 07b7112bc1ffe, -, 127),
-        HEX_DBL(+, 1, 666d0dad2961d, -, 126),
-        HEX_DBL(+, 1, e726c3f64d0fe, -, 125),
-        HEX_DBL(+, 1, 4b0dc07cabf98, -, 123),
-        HEX_DBL(+, 1, c1f2daf3b6a46, -, 122),
-        HEX_DBL(+, 1, 31c5957a47de2, -, 120),
-        HEX_DBL(+, 1, 9f96445648b9f, -, 119),
-        HEX_DBL(+, 1, 1a6baeadb4fd1, -, 117),
-        HEX_DBL(+, 1, 7fd974d372e45, -, 116),
-        HEX_DBL(+, 1, 04da4d1452919, -, 114),
-        HEX_DBL(+, 1, 62891f06b345, -, 113),
-        HEX_DBL(+, 1, e1dd273aa8a4a, -, 112),
-        HEX_DBL(+, 1, 4775e0840bfdd, -, 110),
-        HEX_DBL(+, 1, bd109d9d94bda, -, 109),
-        HEX_DBL(+, 1, 2e73f53fba844, -, 107),
-        HEX_DBL(+, 1, 9b138170d6bfe, -, 106),
-        HEX_DBL(+, 1, 175af0cf60ec5, -, 104),
-        HEX_DBL(+, 1, 7baee1bffa80b, -, 103),
-        HEX_DBL(+, 1, 02057d1245ceb, -, 101),
-        HEX_DBL(+, 1, 5eafffb34ba31, -, 100),
-        HEX_DBL(+, 1, dca23bae16424, -, 99),
-        HEX_DBL(+, 1, 43e7fc88b8056, -, 97),
-        HEX_DBL(+, 1, b83bf23a9a9eb, -, 96),
-        HEX_DBL(+, 1, 2b2b8dd05b318, -, 94),
-        HEX_DBL(+, 1, 969d47321e4cc, -, 93),
-        HEX_DBL(+, 1, 1452b7723aed2, -, 91),
-        HEX_DBL(+, 1, 778fe2497184c, -, 90),
-        HEX_DBL(+, 1, fe7116182e9cc, -, 89),
-        HEX_DBL(+, 1, 5ae191a99585a, -, 87),
-        HEX_DBL(+, 1, d775d87da854d, -, 86),
-        HEX_DBL(+, 1, 4063f8cc8bb98, -, 84),
-        HEX_DBL(+, 1, b374b315f87c1, -, 83),
-        HEX_DBL(+, 1, 27ec458c65e3c, -, 81),
-        HEX_DBL(+, 1, 923372c67a074, -, 80),
-        HEX_DBL(+, 1, 1152eaeb73c08, -, 78),
-        HEX_DBL(+, 1, 737c5645114b5, -, 77),
-        HEX_DBL(+, 1, f8e6c24b5592e, -, 76),
-        HEX_DBL(+, 1, 571db733a9d61, -, 74),
-        HEX_DBL(+, 1, d257d547e083f, -, 73),
-        HEX_DBL(+, 1, 3ce9b9de78f85, -, 71),
-        HEX_DBL(+, 1, aebabae3a41b5, -, 70),
-        HEX_DBL(+, 1, 24b6031b49bda, -, 68),
-        HEX_DBL(+, 1, 8dd5e1bb09d7e, -, 67),
-        HEX_DBL(+, 1, 0e5b73d1ff53d, -, 65),
-        HEX_DBL(+, 1, 6f741de1748ec, -, 64),
-        HEX_DBL(+, 1, f36bd37f42f3e, -, 63),
-        HEX_DBL(+, 1, 536452ee2f75c, -, 61),
-        HEX_DBL(+, 1, cd480a1b7482, -, 60),
-        HEX_DBL(+, 1, 39792499b1a24, -, 58),
-        HEX_DBL(+, 1, aa0de4bf35b38, -, 57),
-        HEX_DBL(+, 1, 2188ad6ae3303, -, 55),
-        HEX_DBL(+, 1, 898471fca6055, -, 54),
-        HEX_DBL(+, 1, 0b6c3afdde064, -, 52),
-        HEX_DBL(+, 1, 6b7719a59f0e, -, 51),
-        HEX_DBL(+, 1, ee001eed62aa, -, 50),
-        HEX_DBL(+, 1, 4fb547c775da8, -, 48),
-        HEX_DBL(+, 1, c8464f7616468, -, 47),
-        HEX_DBL(+, 1, 36121e24d3bba, -, 45),
-        HEX_DBL(+, 1, a56e0c2ac7f75, -, 44),
-        HEX_DBL(+, 1, 1e642baeb84a, -, 42),
-        HEX_DBL(+, 1, 853f01d6d53ba, -, 41),
-        HEX_DBL(+, 1, 0885298767e9a, -, 39),
-        HEX_DBL(+, 1, 67852a7007e42, -, 38),
-        HEX_DBL(+, 1, e8a37a45fc32e, -, 37),
-        HEX_DBL(+, 1, 4c1078fe9228a, -, 35),
-        HEX_DBL(+, 1, c3527e433fab1, -, 34),
-        HEX_DBL(+, 1, 32b48bf117da2, -, 32),
-        HEX_DBL(+, 1, a0db0d0ddb3ec, -, 31),
-        HEX_DBL(+, 1, 1b48655f37267, -, 29),
-        HEX_DBL(+, 1, 81056ff2c5772, -, 28),
-        HEX_DBL(+, 1, 05a628c699fa1, -, 26),
-        HEX_DBL(+, 1, 639e3175a689d, -, 25),
-        HEX_DBL(+, 1, e355bbaee85cb, -, 24),
-        HEX_DBL(+, 1, 4875ca227ec38, -, 22),
-        HEX_DBL(+, 1, be6c6fdb01612, -, 21),
-        HEX_DBL(+, 1, 2f6053b981d98, -, 19),
-        HEX_DBL(+, 1, 9c54c3b43bc8b, -, 18),
-        HEX_DBL(+, 1, 18354238f6764, -, 16),
-        HEX_DBL(+, 1, 7cd79b5647c9b, -, 15),
-        HEX_DBL(+, 1, 02cf22526545a, -, 13),
-        HEX_DBL(+, 1, 5fc21041027ad, -, 12),
-        HEX_DBL(+, 1, de16b9c24a98f, -, 11),
-        HEX_DBL(+, 1, 44e51f113d4d6, -, 9),
-        HEX_DBL(+, 1, b993fe00d5376, -, 8),
-        HEX_DBL(+, 1, 2c155b8213cf4, -, 6),
-        HEX_DBL(+, 1, 97db0ccceb0af, -, 5),
-        HEX_DBL(+, 1, 152aaa3bf81cc, -, 3),
-        HEX_DBL(+, 1, 78b56362cef38, -, 2),
-        HEX_DBL(+, 1, 0, +, 0),
-        HEX_DBL(+, 1, 5bf0a8b145769, +, 1),
-        HEX_DBL(+, 1, d8e64b8d4ddae, +, 2),
-        HEX_DBL(+, 1, 415e5bf6fb106, +, 4),
-        HEX_DBL(+, 1, b4c902e273a58, +, 5),
-        HEX_DBL(+, 1, 28d389970338f, +, 7),
-        HEX_DBL(+, 1, 936dc5690c08f, +, 8),
-        HEX_DBL(+, 1, 122885aaeddaa, +, 10),
-        HEX_DBL(+, 1, 749ea7d470c6e, +, 11),
-        HEX_DBL(+, 1, fa7157c470f82, +, 12),
-        HEX_DBL(+, 1, 5829dcf95056, +, 14),
-        HEX_DBL(+, 1, d3c4488ee4f7f, +, 15),
-        HEX_DBL(+, 1, 3de1654d37c9a, +, 17),
-        HEX_DBL(+, 1, b00b5916ac955, +, 18),
-        HEX_DBL(+, 1, 259ac48bf05d7, +, 20),
-        HEX_DBL(+, 1, 8f0ccafad2a87, +, 21),
-        HEX_DBL(+, 1, 0f2ebd0a8002, +, 23),
-        HEX_DBL(+, 1, 709348c0ea4f9, +, 24),
-        HEX_DBL(+, 1, f4f22091940bd, +, 25),
-        HEX_DBL(+, 1, 546d8f9ed26e1, +, 27),
-        HEX_DBL(+, 1, ceb088b68e804, +, 28),
-        HEX_DBL(+, 1, 3a6e1fd9eecfd, +, 30),
-        HEX_DBL(+, 1, ab5adb9c436, +, 31),
-        HEX_DBL(+, 1, 226af33b1fdc1, +, 33),
-        HEX_DBL(+, 1, 8ab7fb5475fb7, +, 34),
-        HEX_DBL(+, 1, 0c3d3920962c9, +, 36),
-        HEX_DBL(+, 1, 6c932696a6b5d, +, 37),
-        HEX_DBL(+, 1, ef822f7f6731d, +, 38),
-        HEX_DBL(+, 1, 50bba3796379a, +, 40),
-        HEX_DBL(+, 1, c9aae4631c056, +, 41),
-        HEX_DBL(+, 1, 370470aec28ed, +, 43),
-        HEX_DBL(+, 1, a6b765d8cdf6d, +, 44),
-        HEX_DBL(+, 1, 1f43fcc4b662c, +, 46),
-        HEX_DBL(+, 1, 866f34a725782, +, 47),
-        HEX_DBL(+, 1, 0953e2f3a1ef7, +, 49),
-        HEX_DBL(+, 1, 689e221bc8d5b, +, 50),
-        HEX_DBL(+, 1, ea215a1d20d76, +, 51),
-        HEX_DBL(+, 1, 4d13fbb1a001a, +, 53),
-        HEX_DBL(+, 1, c4b334617cc67, +, 54),
-        HEX_DBL(+, 1, 33a43d282a519, +, 56),
-        HEX_DBL(+, 1, a220d397972eb, +, 57),
-        HEX_DBL(+, 1, 1c25c88df6862, +, 59),
-        HEX_DBL(+, 1, 8232558201159, +, 60),
-        HEX_DBL(+, 1, 0672a3c9eb871, +, 62),
-        HEX_DBL(+, 1, 64b41c6d37832, +, 63),
-        HEX_DBL(+, 1, e4cf766fe49be, +, 64),
-        HEX_DBL(+, 1, 49767bc0483e3, +, 66),
-        HEX_DBL(+, 1, bfc951eb8bb76, +, 67),
-        HEX_DBL(+, 1, 304d6aeca254b, +, 69),
-        HEX_DBL(+, 1, 9d97010884251, +, 70),
-        HEX_DBL(+, 1, 19103e4080b45, +, 72),
-        HEX_DBL(+, 1, 7e013cd114461, +, 73),
-        HEX_DBL(+, 1, 03996528e074c, +, 75),
-        HEX_DBL(+, 1, 60d4f6fdac731, +, 76),
-        HEX_DBL(+, 1, df8c5af17ba3b, +, 77),
-        HEX_DBL(+, 1, 45e3076d61699, +, 79),
-        HEX_DBL(+, 1, baed16a6e0da7, +, 80),
-        HEX_DBL(+, 1, 2cffdfebde1a1, +, 82),
-        HEX_DBL(+, 1, 9919cabefcb69, +, 83),
-        HEX_DBL(+, 1, 160345c9953e3, +, 85),
-        HEX_DBL(+, 1, 79dbc9dc53c66, +, 86),
-        HEX_DBL(+, 1, 00c810d464097, +, 88),
-        HEX_DBL(+, 1, 5d009394c5c27, +, 89),
-        HEX_DBL(+, 1, da57de8f107a8, +, 90),
-        HEX_DBL(+, 1, 425982cf597cd, +, 92),
-        HEX_DBL(+, 1, b61e5ca3a5e31, +, 93),
-        HEX_DBL(+, 1, 29bb825dfcf87, +, 95),
-        HEX_DBL(+, 1, 94a90db0d6fe2, +, 96),
-        HEX_DBL(+, 1, 12fec759586fd, +, 98),
-        HEX_DBL(+, 1, 75c1dc469e3af, +, 99),
-        HEX_DBL(+, 1, fbfd219c43b04, +, 100),
-        HEX_DBL(+, 1, 5936d44e1a146, +, 102),
-        HEX_DBL(+, 1, d531d8a7ee79c, +, 103),
-        HEX_DBL(+, 1, 3ed9d24a2d51b, +, 105),
-        HEX_DBL(+, 1, b15cfe5b6e17b, +, 106),
-        HEX_DBL(+, 1, 268038c2c0e, +, 108),
-        HEX_DBL(+, 1, 9044a73545d48, +, 109),
-        HEX_DBL(+, 1, 1002ab6218b38, +, 111),
-        HEX_DBL(+, 1, 71b3540cbf921, +, 112),
-        HEX_DBL(+, 1, f6799ea9c414a, +, 113),
-        HEX_DBL(+, 1, 55779b984f3eb, +, 115),
-        HEX_DBL(+, 1, d01a210c44aa4, +, 116),
-        HEX_DBL(+, 1, 3b63da8e9121, +, 118),
-        HEX_DBL(+, 1, aca8d6b0116b8, +, 119),
-        HEX_DBL(+, 1, 234de9e0c74e9, +, 121),
-        HEX_DBL(+, 1, 8bec7503ca477, +, 122),
-        HEX_DBL(+, 1, 0d0eda9796b9, +, 124),
-        HEX_DBL(+, 1, 6db0118477245, +, 125),
-        HEX_DBL(+, 1, f1056dc7bf22d, +, 126),
-        HEX_DBL(+, 1, 51c2cc3433801, +, 128),
-        HEX_DBL(+, 1, cb108ffbec164, +, 129),
-        HEX_DBL(+, 1, 37f780991b584, +, 131),
-        HEX_DBL(+, 1, a801c0ea8ac4d, +, 132),
-        HEX_DBL(+, 1, 20247cc4c46c1, +, 134),
-        HEX_DBL(+, 1, 87a0553328015, +, 135),
-        HEX_DBL(+, 1, 0a233dee4f9bb, +, 137),
-        HEX_DBL(+, 1, 69b7f55b808ba, +, 138),
-        HEX_DBL(+, 1, eba064644060a, +, 139),
-        HEX_DBL(+, 1, 4e184933d9364, +, 141),
-        HEX_DBL(+, 1, c614fe2531841, +, 142),
-        HEX_DBL(+, 1, 3494a9b171bf5, +, 144),
-        HEX_DBL(+, 1, a36798b9d969b, +, 145),
-        HEX_DBL(+, 1, 1d03d8c0c04af, +, 147),
-        HEX_DBL(+, 1, 836026385c974, +, 148),
-        HEX_DBL(+, 1, 073fbe9ac901d, +, 150),
-        HEX_DBL(+, 1, 65cae0969f286, +, 151),
-        HEX_DBL(+, 1, e64a58639cae8, +, 152),
-        HEX_DBL(+, 1, 4a77f5f9b50f9, +, 154),
-        HEX_DBL(+, 1, c12744a3a28e3, +, 155),
-        HEX_DBL(+, 1, 313b3b6978e85, +, 157),
-        HEX_DBL(+, 1, 9eda3a31e587e, +, 158),
-        HEX_DBL(+, 1, 19ebe56b56453, +, 160),
-        HEX_DBL(+, 1, 7f2bc6e599b7e, +, 161),
-        HEX_DBL(+, 1, 04644610df2ff, +, 163),
-        HEX_DBL(+, 1, 61e8b490ac4e6, +, 164),
-        HEX_DBL(+, 1, e103201f299b3, +, 165),
-        HEX_DBL(+, 1, 46e1b637beaf5, +, 167),
-        HEX_DBL(+, 1, bc473cfede104, +, 168),
-        HEX_DBL(+, 1, 2deb1b9c85e2d, +, 170),
-        HEX_DBL(+, 1, 9a5981ca67d1, +, 171),
-        HEX_DBL(+, 1, 16dc8a9ef670b, +, 173),
-        HEX_DBL(+, 1, 7b03166942309, +, 174),
-        HEX_DBL(+, 1, 0190be03150a7, +, 176),
-        HEX_DBL(+, 1, 5e1152f9a8119, +, 177),
-        HEX_DBL(+, 1, dbca9263f8487, +, 178),
-        HEX_DBL(+, 1, 43556dee93bee, +, 180),
-        HEX_DBL(+, 1, b774c12967dfa, +, 181),
-        HEX_DBL(+, 1, 2aa4306e922c2, +, 183),
-        HEX_DBL(+, 1, 95e54c5dd4217, +, 184)
-    };
+    static const double exp_table[128+150+1] =
+    {
+        HEX_DBL( +, 1, 82e16284f5ec5, -, 217 ),    HEX_DBL( +, 1, 06e9996332ba1, -, 215 ),
+        HEX_DBL( +, 1, 6555cb289e44b, -, 214 ),    HEX_DBL( +, 1, e5ab364643354, -, 213 ),
+        HEX_DBL( +, 1, 4a0bd18e64df7, -, 211 ),    HEX_DBL( +, 1, c094499cc578e, -, 210 ),
+        HEX_DBL( +, 1, 30d759323998c, -, 208 ),    HEX_DBL( +, 1, 9e5278ab1d4cf, -, 207 ),
+        HEX_DBL( +, 1, 198fa3f30be25, -, 205 ),    HEX_DBL( +, 1, 7eae636d6144e, -, 204 ),
+        HEX_DBL( +, 1, 040f1036f4863, -, 202 ),    HEX_DBL( +, 1, 6174e477a895f, -, 201 ),
+        HEX_DBL( +, 1, e065b82dd95a,  -, 200 ),    HEX_DBL( +, 1, 4676be491d129, -, 198 ),
+        HEX_DBL( +, 1, bbb5da5f7c823, -, 197 ),    HEX_DBL( +, 1, 2d884eef5fdcb, -, 195 ),
+        HEX_DBL( +, 1, 99d3397ab8371, -, 194 ),    HEX_DBL( +, 1, 1681497ed15b3, -, 192 ),
+        HEX_DBL( +, 1, 7a870f597fdbd, -, 191 ),    HEX_DBL( +, 1, 013c74edba307, -, 189 ),
+        HEX_DBL( +, 1, 5d9ec4ada7938, -, 188 ),    HEX_DBL( +, 1, db2edfd20fa7c, -, 187 ),
+        HEX_DBL( +, 1, 42eb9f39afb0b, -, 185 ),    HEX_DBL( +, 1, b6e4f282b43f4, -, 184 ),
+        HEX_DBL( +, 1, 2a42764857b19, -, 182 ),    HEX_DBL( +, 1, 9560792d19314, -, 181 ),
+        HEX_DBL( +, 1, 137b6ce8e052c, -, 179 ),    HEX_DBL( +, 1, 766b45dd84f18, -, 178 ),
+        HEX_DBL( +, 1, fce362fe6e7d,  -, 177 ),    HEX_DBL( +, 1, 59d34dd8a5473, -, 175 ),
+        HEX_DBL( +, 1, d606847fc727a, -, 174 ),    HEX_DBL( +, 1, 3f6a58b795de3, -, 172 ),
+        HEX_DBL( +, 1, b2216c6efdac1, -, 171 ),    HEX_DBL( +, 1, 2705b5b153fb8, -, 169 ),
+        HEX_DBL( +, 1, 90fa1509bd50d, -, 168 ),    HEX_DBL( +, 1, 107df698da211, -, 166 ),
+        HEX_DBL( +, 1, 725ae6e7b9d35, -, 165 ),    HEX_DBL( +, 1, f75d6040aeff6, -, 164 ),
+        HEX_DBL( +, 1, 56126259e093c, -, 162 ),    HEX_DBL( +, 1, d0ec7df4f7bd4, -, 161 ),
+        HEX_DBL( +, 1, 3bf2cf6722e46, -, 159 ),    HEX_DBL( +, 1, ad6b22f55db42, -, 158 ),
+        HEX_DBL( +, 1, 23d1f3e5834a,  -, 156 ),    HEX_DBL( +, 1, 8c9feab89b876, -, 155 ),
+        HEX_DBL( +, 1, 0d88cf37f00dd, -, 153 ),    HEX_DBL( +, 1, 6e55d2bf838a7, -, 152 ),
+        HEX_DBL( +, 1, f1e6b68529e33, -, 151 ),    HEX_DBL( +, 1, 525be4e4e601d, -, 149 ),
+        HEX_DBL( +, 1, cbe0a45f75eb1, -, 148 ),    HEX_DBL( +, 1, 3884e838aea68, -, 146 ),
+        HEX_DBL( +, 1, a8c1f14e2af5d, -, 145 ),    HEX_DBL( +, 1, 20a717e64a9bd, -, 143 ),
+        HEX_DBL( +, 1, 8851d84118908, -, 142 ),    HEX_DBL( +, 1, 0a9bdfb02d24,  -, 140 ),
+        HEX_DBL( +, 1, 6a5bea046b42e, -, 139 ),    HEX_DBL( +, 1, ec7f3b269efa8, -, 138 ),
+        HEX_DBL( +, 1, 4eafb87eab0f2, -, 136 ),    HEX_DBL( +, 1, c6e2d05bbc,    -, 135 ),
+        HEX_DBL( +, 1, 35208867c2683, -, 133 ),    HEX_DBL( +, 1, a425b317eeacd, -, 132 ),
+        HEX_DBL( +, 1, 1d8508fa8246a, -, 130 ),    HEX_DBL( +, 1, 840fbc08fdc8a, -, 129 ),
+        HEX_DBL( +, 1, 07b7112bc1ffe, -, 127 ),    HEX_DBL( +, 1, 666d0dad2961d, -, 126 ),
+        HEX_DBL( +, 1, e726c3f64d0fe, -, 125 ),    HEX_DBL( +, 1, 4b0dc07cabf98, -, 123 ),
+        HEX_DBL( +, 1, c1f2daf3b6a46, -, 122 ),    HEX_DBL( +, 1, 31c5957a47de2, -, 120 ),
+        HEX_DBL( +, 1, 9f96445648b9f, -, 119 ),    HEX_DBL( +, 1, 1a6baeadb4fd1, -, 117 ),
+        HEX_DBL( +, 1, 7fd974d372e45, -, 116 ),    HEX_DBL( +, 1, 04da4d1452919, -, 114 ),
+        HEX_DBL( +, 1, 62891f06b345,  -, 113 ),    HEX_DBL( +, 1, e1dd273aa8a4a, -, 112 ),
+        HEX_DBL( +, 1, 4775e0840bfdd, -, 110 ),    HEX_DBL( +, 1, bd109d9d94bda, -, 109 ),
+        HEX_DBL( +, 1, 2e73f53fba844, -, 107 ),    HEX_DBL( +, 1, 9b138170d6bfe, -, 106 ),
+        HEX_DBL( +, 1, 175af0cf60ec5, -, 104 ),    HEX_DBL( +, 1, 7baee1bffa80b, -, 103 ),
+        HEX_DBL( +, 1, 02057d1245ceb, -, 101 ),    HEX_DBL( +, 1, 5eafffb34ba31, -, 100 ),
+        HEX_DBL( +, 1, dca23bae16424, -, 99 ),    HEX_DBL( +, 1, 43e7fc88b8056, -, 97 ),
+        HEX_DBL( +, 1, b83bf23a9a9eb, -, 96 ),    HEX_DBL( +, 1, 2b2b8dd05b318, -, 94 ),
+        HEX_DBL( +, 1, 969d47321e4cc, -, 93 ),    HEX_DBL( +, 1, 1452b7723aed2, -, 91 ),
+        HEX_DBL( +, 1, 778fe2497184c, -, 90 ),    HEX_DBL( +, 1, fe7116182e9cc, -, 89 ),
+        HEX_DBL( +, 1, 5ae191a99585a, -, 87 ),    HEX_DBL( +, 1, d775d87da854d, -, 86 ),
+        HEX_DBL( +, 1, 4063f8cc8bb98, -, 84 ),    HEX_DBL( +, 1, b374b315f87c1, -, 83 ),
+        HEX_DBL( +, 1, 27ec458c65e3c, -, 81 ),    HEX_DBL( +, 1, 923372c67a074, -, 80 ),
+        HEX_DBL( +, 1, 1152eaeb73c08, -, 78 ),    HEX_DBL( +, 1, 737c5645114b5, -, 77 ),
+        HEX_DBL( +, 1, f8e6c24b5592e, -, 76 ),    HEX_DBL( +, 1, 571db733a9d61, -, 74 ),
+        HEX_DBL( +, 1, d257d547e083f, -, 73 ),    HEX_DBL( +, 1, 3ce9b9de78f85, -, 71 ),
+        HEX_DBL( +, 1, aebabae3a41b5, -, 70 ),    HEX_DBL( +, 1, 24b6031b49bda, -, 68 ),
+        HEX_DBL( +, 1, 8dd5e1bb09d7e, -, 67 ),    HEX_DBL( +, 1, 0e5b73d1ff53d, -, 65 ),
+        HEX_DBL( +, 1, 6f741de1748ec, -, 64 ),    HEX_DBL( +, 1, f36bd37f42f3e, -, 63 ),
+        HEX_DBL( +, 1, 536452ee2f75c, -, 61 ),    HEX_DBL( +, 1, cd480a1b7482,  -, 60 ),
+        HEX_DBL( +, 1, 39792499b1a24, -, 58 ),    HEX_DBL( +, 1, aa0de4bf35b38, -, 57 ),
+        HEX_DBL( +, 1, 2188ad6ae3303, -, 55 ),    HEX_DBL( +, 1, 898471fca6055, -, 54 ),
+        HEX_DBL( +, 1, 0b6c3afdde064, -, 52 ),    HEX_DBL( +, 1, 6b7719a59f0e,  -, 51 ),
+        HEX_DBL( +, 1, ee001eed62aa, -, 50 ),    HEX_DBL( +, 1, 4fb547c775da8, -, 48 ),
+        HEX_DBL( +, 1, c8464f7616468, -, 47 ),    HEX_DBL( +, 1, 36121e24d3bba, -, 45 ),
+        HEX_DBL( +, 1, a56e0c2ac7f75, -, 44 ),    HEX_DBL( +, 1, 1e642baeb84a,  -, 42 ),
+        HEX_DBL( +, 1, 853f01d6d53ba, -, 41 ),    HEX_DBL( +, 1, 0885298767e9a, -, 39 ),
+        HEX_DBL( +, 1, 67852a7007e42, -, 38 ),    HEX_DBL( +, 1, e8a37a45fc32e, -, 37 ),
+        HEX_DBL( +, 1, 4c1078fe9228a, -, 35 ),    HEX_DBL( +, 1, c3527e433fab1, -, 34 ),
+        HEX_DBL( +, 1, 32b48bf117da2, -, 32 ),    HEX_DBL( +, 1, a0db0d0ddb3ec, -, 31 ),
+        HEX_DBL( +, 1, 1b48655f37267, -, 29 ),    HEX_DBL( +, 1, 81056ff2c5772, -, 28 ),
+        HEX_DBL( +, 1, 05a628c699fa1, -, 26 ),    HEX_DBL( +, 1, 639e3175a689d, -, 25 ),
+        HEX_DBL( +, 1, e355bbaee85cb, -, 24 ),    HEX_DBL( +, 1, 4875ca227ec38, -, 22 ),
+        HEX_DBL( +, 1, be6c6fdb01612, -, 21 ),    HEX_DBL( +, 1, 2f6053b981d98, -, 19 ),
+        HEX_DBL( +, 1, 9c54c3b43bc8b, -, 18 ),    HEX_DBL( +, 1, 18354238f6764, -, 16 ),
+        HEX_DBL( +, 1, 7cd79b5647c9b, -, 15 ),    HEX_DBL( +, 1, 02cf22526545a, -, 13 ),
+        HEX_DBL( +, 1, 5fc21041027ad, -, 12 ),    HEX_DBL( +, 1, de16b9c24a98f, -, 11 ),
+        HEX_DBL( +, 1, 44e51f113d4d6, -, 9 ),    HEX_DBL( +, 1, b993fe00d5376, -, 8 ),
+        HEX_DBL( +, 1, 2c155b8213cf4, -, 6 ),    HEX_DBL( +, 1, 97db0ccceb0af, -, 5 ),
+        HEX_DBL( +, 1, 152aaa3bf81cc, -, 3 ),    HEX_DBL( +, 1, 78b56362cef38, -, 2 ),
+        HEX_DBL( +, 1, 0, +, 0 ),                HEX_DBL( +, 1, 5bf0a8b145769, +, 1 ),
+        HEX_DBL( +, 1, d8e64b8d4ddae, +, 2 ),    HEX_DBL( +, 1, 415e5bf6fb106, +, 4 ),
+        HEX_DBL( +, 1, b4c902e273a58, +, 5 ),    HEX_DBL( +, 1, 28d389970338f, +, 7 ),
+        HEX_DBL( +, 1, 936dc5690c08f, +, 8 ),    HEX_DBL( +, 1, 122885aaeddaa, +, 10 ),
+        HEX_DBL( +, 1, 749ea7d470c6e, +, 11 ),    HEX_DBL( +, 1, fa7157c470f82, +, 12 ),
+        HEX_DBL( +, 1, 5829dcf95056,  +, 14 ),    HEX_DBL( +, 1, d3c4488ee4f7f, +, 15 ),
+        HEX_DBL( +, 1, 3de1654d37c9a, +, 17 ),    HEX_DBL( +, 1, b00b5916ac955, +, 18 ),
+        HEX_DBL( +, 1, 259ac48bf05d7, +, 20 ),    HEX_DBL( +, 1, 8f0ccafad2a87, +, 21 ),
+        HEX_DBL( +, 1, 0f2ebd0a8002,  +, 23 ),    HEX_DBL( +, 1, 709348c0ea4f9, +, 24 ),
+        HEX_DBL( +, 1, f4f22091940bd, +, 25 ),    HEX_DBL( +, 1, 546d8f9ed26e1, +, 27 ),
+        HEX_DBL( +, 1, ceb088b68e804, +, 28 ),    HEX_DBL( +, 1, 3a6e1fd9eecfd, +, 30 ),
+        HEX_DBL( +, 1, ab5adb9c436,   +, 31 ),    HEX_DBL( +, 1, 226af33b1fdc1, +, 33 ),
+        HEX_DBL( +, 1, 8ab7fb5475fb7, +, 34 ),    HEX_DBL( +, 1, 0c3d3920962c9, +, 36 ),
+        HEX_DBL( +, 1, 6c932696a6b5d, +, 37 ),    HEX_DBL( +, 1, ef822f7f6731d, +, 38 ),
+        HEX_DBL( +, 1, 50bba3796379a, +, 40 ),    HEX_DBL( +, 1, c9aae4631c056, +, 41 ),
+        HEX_DBL( +, 1, 370470aec28ed, +, 43 ),    HEX_DBL( +, 1, a6b765d8cdf6d, +, 44 ),
+        HEX_DBL( +, 1, 1f43fcc4b662c, +, 46 ),    HEX_DBL( +, 1, 866f34a725782, +, 47 ),
+        HEX_DBL( +, 1, 0953e2f3a1ef7, +, 49 ),    HEX_DBL( +, 1, 689e221bc8d5b, +, 50 ),
+        HEX_DBL( +, 1, ea215a1d20d76, +, 51 ),    HEX_DBL( +, 1, 4d13fbb1a001a, +, 53 ),
+        HEX_DBL( +, 1, c4b334617cc67, +, 54 ),    HEX_DBL( +, 1, 33a43d282a519, +, 56 ),
+        HEX_DBL( +, 1, a220d397972eb, +, 57 ),    HEX_DBL( +, 1, 1c25c88df6862, +, 59 ),
+        HEX_DBL( +, 1, 8232558201159, +, 60 ),    HEX_DBL( +, 1, 0672a3c9eb871, +, 62 ),
+        HEX_DBL( +, 1, 64b41c6d37832, +, 63 ),    HEX_DBL( +, 1, e4cf766fe49be, +, 64 ),
+        HEX_DBL( +, 1, 49767bc0483e3, +, 66 ),    HEX_DBL( +, 1, bfc951eb8bb76, +, 67 ),
+        HEX_DBL( +, 1, 304d6aeca254b, +, 69 ),    HEX_DBL( +, 1, 9d97010884251, +, 70 ),
+        HEX_DBL( +, 1, 19103e4080b45, +, 72 ),    HEX_DBL( +, 1, 7e013cd114461, +, 73 ),
+        HEX_DBL( +, 1, 03996528e074c, +, 75 ),    HEX_DBL( +, 1, 60d4f6fdac731, +, 76 ),
+        HEX_DBL( +, 1, df8c5af17ba3b, +, 77 ),    HEX_DBL( +, 1, 45e3076d61699, +, 79 ),
+        HEX_DBL( +, 1, baed16a6e0da7, +, 80 ),    HEX_DBL( +, 1, 2cffdfebde1a1, +, 82 ),
+        HEX_DBL( +, 1, 9919cabefcb69, +, 83 ),    HEX_DBL( +, 1, 160345c9953e3, +, 85 ),
+        HEX_DBL( +, 1, 79dbc9dc53c66, +, 86 ),    HEX_DBL( +, 1, 00c810d464097, +, 88 ),
+        HEX_DBL( +, 1, 5d009394c5c27, +, 89 ),    HEX_DBL( +, 1, da57de8f107a8, +, 90 ),
+        HEX_DBL( +, 1, 425982cf597cd, +, 92 ),    HEX_DBL( +, 1, b61e5ca3a5e31, +, 93 ),
+        HEX_DBL( +, 1, 29bb825dfcf87, +, 95 ),    HEX_DBL( +, 1, 94a90db0d6fe2, +, 96 ),
+        HEX_DBL( +, 1, 12fec759586fd, +, 98 ),    HEX_DBL( +, 1, 75c1dc469e3af, +, 99 ),
+        HEX_DBL( +, 1, fbfd219c43b04, +, 100 ),    HEX_DBL( +, 1, 5936d44e1a146, +, 102 ),
+        HEX_DBL( +, 1, d531d8a7ee79c, +, 103 ),    HEX_DBL( +, 1, 3ed9d24a2d51b, +, 105 ),
+        HEX_DBL( +, 1, b15cfe5b6e17b, +, 106 ),    HEX_DBL( +, 1, 268038c2c0e,   +, 108 ),
+        HEX_DBL( +, 1, 9044a73545d48, +, 109 ),    HEX_DBL( +, 1, 1002ab6218b38, +, 111 ),
+        HEX_DBL( +, 1, 71b3540cbf921, +, 112 ),    HEX_DBL( +, 1, f6799ea9c414a, +, 113 ),
+        HEX_DBL( +, 1, 55779b984f3eb, +, 115 ),    HEX_DBL( +, 1, d01a210c44aa4, +, 116 ),
+        HEX_DBL( +, 1, 3b63da8e9121,  +, 118 ),    HEX_DBL( +, 1, aca8d6b0116b8, +, 119 ),
+        HEX_DBL( +, 1, 234de9e0c74e9, +, 121 ),    HEX_DBL( +, 1, 8bec7503ca477, +, 122 ),
+        HEX_DBL( +, 1, 0d0eda9796b9,  +, 124 ),    HEX_DBL( +, 1, 6db0118477245, +, 125 ),
+        HEX_DBL( +, 1, f1056dc7bf22d, +, 126 ),    HEX_DBL( +, 1, 51c2cc3433801, +, 128 ),
+        HEX_DBL( +, 1, cb108ffbec164, +, 129 ),    HEX_DBL( +, 1, 37f780991b584, +, 131 ),
+        HEX_DBL( +, 1, a801c0ea8ac4d, +, 132 ),    HEX_DBL( +, 1, 20247cc4c46c1, +, 134 ),
+        HEX_DBL( +, 1, 87a0553328015, +, 135 ),    HEX_DBL( +, 1, 0a233dee4f9bb, +, 137 ),
+        HEX_DBL( +, 1, 69b7f55b808ba, +, 138 ),    HEX_DBL( +, 1, eba064644060a, +, 139 ),
+        HEX_DBL( +, 1, 4e184933d9364, +, 141 ),    HEX_DBL( +, 1, c614fe2531841, +, 142 ),
+        HEX_DBL( +, 1, 3494a9b171bf5, +, 144 ),    HEX_DBL( +, 1, a36798b9d969b, +, 145 ),
+        HEX_DBL( +, 1, 1d03d8c0c04af, +, 147 ),    HEX_DBL( +, 1, 836026385c974, +, 148 ),
+        HEX_DBL( +, 1, 073fbe9ac901d, +, 150 ),    HEX_DBL( +, 1, 65cae0969f286, +, 151 ),
+        HEX_DBL( +, 1, e64a58639cae8, +, 152 ),    HEX_DBL( +, 1, 4a77f5f9b50f9, +, 154 ),
+        HEX_DBL( +, 1, c12744a3a28e3, +, 155 ),    HEX_DBL( +, 1, 313b3b6978e85, +, 157 ),
+        HEX_DBL( +, 1, 9eda3a31e587e, +, 158 ),    HEX_DBL( +, 1, 19ebe56b56453, +, 160 ),
+        HEX_DBL( +, 1, 7f2bc6e599b7e, +, 161 ),    HEX_DBL( +, 1, 04644610df2ff, +, 163 ),
+        HEX_DBL( +, 1, 61e8b490ac4e6, +, 164 ),    HEX_DBL( +, 1, e103201f299b3, +, 165 ),
+        HEX_DBL( +, 1, 46e1b637beaf5, +, 167 ),    HEX_DBL( +, 1, bc473cfede104, +, 168 ),
+        HEX_DBL( +, 1, 2deb1b9c85e2d, +, 170 ),    HEX_DBL( +, 1, 9a5981ca67d1,  +, 171 ),
+        HEX_DBL( +, 1, 16dc8a9ef670b, +, 173 ),    HEX_DBL( +, 1, 7b03166942309, +, 174 ),
+        HEX_DBL( +, 1, 0190be03150a7, +, 176 ),    HEX_DBL( +, 1, 5e1152f9a8119, +, 177 ),
+        HEX_DBL( +, 1, dbca9263f8487, +, 178 ),    HEX_DBL( +, 1, 43556dee93bee, +, 180 ),
+        HEX_DBL( +, 1, b774c12967dfa, +, 181 ),    HEX_DBL( +, 1, 2aa4306e922c2, +, 183 ),
+        HEX_DBL( +, 1, 95e54c5dd4217, +, 184 )    };
 
-    // scale by e**i --  (expm1(f) + 1)*e**i - 1  = expm1(f) * e**i + e**i - 1 =
-    // e**i
-    return exp_table[exponent + 150] + (f * exp_table[exponent + 150] - 1.0);
+    // scale by e**i --  (expm1(f) + 1)*e**i - 1  = expm1(f) * e**i + e**i - 1 = e**i
+    return exp_table[exponent+150] + (f * exp_table[exponent+150] - 1.0);
 }
 
 
-double reference_fmax(double x, double y)
+double reference_fmax( double x, double y )
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x >= y ? x : y;
 }
 
-double reference_fmin(double x, double y)
+double reference_fmin( double x, double y )
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x <= y ? x : y;
 }
 
-double reference_hypot(double x, double y)
+double reference_hypot( double x, double y )
 {
-    // Since the inputs are actually floats, we don't have to worry about range
-    // here
-    if (isinf(x) || isinf(y)) return INFINITY;
+    // Since the inputs are actually floats, we don't have to worry about range here
+    if( isinf(x) || isinf(y) )
+        return INFINITY;
 
-    return sqrt(x * x + y * y);
+    return sqrt( x * x + y * y );
 }
 
-int reference_ilogbl(long double x)
+int    reference_ilogbl( long double x)
 {
     extern int gDeviceILogb0, gDeviceILogbNaN;
 
     // Since we are just using this to verify double precision, we can
     // use the double precision ilogb here
-    union {
-        double f;
-        cl_ulong u;
-    } u;
-    u.f = (double)x;
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
 
     int exponent = (int)(u.u >> 52) & 0x7ff;
-    if (exponent == 0x7ff)
+    if( exponent == 0x7ff )
     {
-        if (u.u & 0x000fffffffffffffULL) return gDeviceILogbNaN;
+        if( u.u & 0x000fffffffffffffULL )
+            return gDeviceILogbNaN;
 
         return CL_INT_MAX;
     }
 
-    if (exponent == 0)
-    { // deal with denormals
-        u.f = x * HEX_DBL(+, 1, 0, +, 64);
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f =  x * HEX_DBL( +, 1, 0, +, 64 );
         exponent = (cl_uint)(u.u >> 52) & 0x7ff;
-        if (exponent == 0) return gDeviceILogb0;
+        if( exponent == 0 )
+            return gDeviceILogb0;
 
         exponent -= 1023 + 64;
         return exponent;
@@ -1694,105 +1516,84 @@
     return exponent - 1023;
 }
 
-double reference_relaxed_log2(double x) { return reference_log2(x); }
+//double reference_log2( double x )
+//{
+//    return log( x ) * 1.44269504088896340735992468100189214;
+//}
 
-double reference_log2(double x)
+
+double reference_relaxed_log2( double x )
 {
-    if (isnan(x) || x < 0.0 || x == -INFINITY) return cl_make_nan();
+  return reference_log2(x);
+}
 
-    if (x == 0.0f) return -INFINITY;
+double reference_log2( double x )
+{
+    if( isnan(x) || x < 0.0 || x == -INFINITY)
+        return cl_make_nan();
 
-    if (x == INFINITY) return INFINITY;
+    if( x == 0.0f)
+        return -INFINITY;
+
+    if( x == INFINITY )
+        return INFINITY;
 
     double hi, lo;
-    __log2_ep(&hi, &lo, x);
+    __log2_ep( &hi, &lo, x );
     return hi;
 }
 
-double reference_log1p(double x)
-{ // This function is suitable only for verifying log1pf(). It produces several
-  // double precision ulps of error.
+double reference_log1p( double x )
+{   // This function is suitable only for verifying log1pf(). It produces several double precision ulps of error.
 
     // Handle small and NaN
-    if (!(reference_fabs(x) > HEX_DBL(+, 1, 0, -, 53))) return x;
+    if( ! ( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 53 ) ) )
+        return x;
 
     // deal with special values
-    if (x <= -1.0)
+    if( x <= -1.0 )
     {
-        if (x < -1.0) return cl_make_nan();
+        if( x < -1.0 )
+            return cl_make_nan();
         return -INFINITY;
     }
 
     // infinity
-    if (x == INFINITY) return INFINITY;
+    if( x == INFINITY )
+        return INFINITY;
 
-    // High precision result for when near 0, to avoid problems with the
-    // reference result falling in the wrong binade.
-    if (reference_fabs(x) < HEX_DBL(+, 1, 0, -, 28)) return (1.0 - 0.5 * x) * x;
+    // High precision result for when near 0, to avoid problems with the reference result falling in the wrong binade.
+    if( reference_fabs(x) < HEX_DBL( +, 1, 0, -, 28 ) )
+        return (1.0 - 0.5 * x) * x;
 
     // Our polynomial is only good in the region +-2**-4.
     // If we aren't in that range then we need to reduce to be in that range
-    double correctionLo =
-        -0.0; // correction down stream to compensate for the reduction, if any
-    double correctionHi =
-        -0.0; // correction down stream to compensate for the exponent, if any
-    if (reference_fabs(x) > HEX_DBL(+, 1, 0, -, 4))
+    double correctionLo = -0.0;           // correction down stream to compensate for the reduction, if any
+    double correctionHi = -0.0;           // correction down stream to compensate for the exponent, if any
+    if( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 4 ) )
     {
-        x += 1.0; // double should cover any loss of precision here
+        x += 1.0;   // double should cover any loss of precision here
 
         // separate x into (1+f) * 2**i
-        union {
-            double d;
-            cl_ulong u;
-        } u;
-        u.d = x;
-        int i = (int)((u.u >> 52) & 0x7ff) - 1023;
+        union{ double d; cl_ulong u;} u;        u.d = x;
+        int i = (int) ((u.u >> 52) & 0x7ff) - 1023;
         u.u &= 0x000fffffffffffffULL;
-        int index = (int)(u.u >> 48);
+        int index = (int) (u.u >> 48 );
         u.u |= 0x3ff0000000000000ULL;
         double f = u.d;
 
         // further reduce f to be within 1/16 of 1.0
-        static const double scale_table[16] = {
-            1.0,
-            HEX_DBL(+, 1, d2d2d2d6e3f79, -, 1),
-            HEX_DBL(+, 1, b8e38e42737a1, -, 1),
-            HEX_DBL(+, 1, a1af28711adf3, -, 1),
-            HEX_DBL(+, 1, 8cccccd88dd65, -, 1),
-            HEX_DBL(+, 1, 79e79e810ec8f, -, 1),
-            HEX_DBL(+, 1, 68ba2e94df404, -, 1),
-            HEX_DBL(+, 1, 590b216defb29, -, 1),
-            HEX_DBL(+, 1, 4aaaaab1500ed, -, 1),
-            HEX_DBL(+, 1, 3d70a3e0d6f73, -, 1),
-            HEX_DBL(+, 1, 313b13bb39f4f, -, 1),
-            HEX_DBL(+, 1, 25ed09823f1cc, -, 1),
-            HEX_DBL(+, 1, 1b6db6e77457b, -, 1),
-            HEX_DBL(+, 1, 11a7b96a3a34f, -, 1),
-            HEX_DBL(+, 1, 0888888e46fea, -, 1),
-            HEX_DBL(+, 1, 00000038e9862, -, 1)
-        };
+        static const double scale_table[16] = {                  1.0, HEX_DBL( +, 1, d2d2d2d6e3f79, -, 1 ), HEX_DBL( +, 1, b8e38e42737a1, -, 1 ), HEX_DBL( +, 1, a1af28711adf3, -, 1 ),
+                                                HEX_DBL( +, 1, 8cccccd88dd65, -, 1 ), HEX_DBL( +, 1, 79e79e810ec8f, -, 1 ), HEX_DBL( +, 1, 68ba2e94df404, -, 1 ), HEX_DBL( +, 1, 590b216defb29, -, 1 ),
+                                                HEX_DBL( +, 1, 4aaaaab1500ed, -, 1 ), HEX_DBL( +, 1, 3d70a3e0d6f73, -, 1 ), HEX_DBL( +, 1, 313b13bb39f4f, -, 1 ), HEX_DBL( +, 1, 25ed09823f1cc, -, 1 ),
+                                                HEX_DBL( +, 1, 1b6db6e77457b, -, 1 ), HEX_DBL( +, 1, 11a7b96a3a34f, -, 1 ), HEX_DBL( +, 1, 0888888e46fea, -, 1 ), HEX_DBL( +, 1, 00000038e9862, -, 1 ) };
 
         // correction_table[i] = -log( scale_table[i] )
-        // All entries have >= 64 bits of precision (rather than the expected
-        // 53)
-        static const double correction_table[16] = {
-            -0.0,
-            HEX_DBL(+, 1, 7a5c722c16058, -, 4),
-            HEX_DBL(+, 1, 323db16c89ab1, -, 3),
-            HEX_DBL(+, 1, a0f87d180629, -, 3),
-            HEX_DBL(+, 1, 050279324e17c, -, 2),
-            HEX_DBL(+, 1, 36f885bb270b0, -, 2),
-            HEX_DBL(+, 1, 669b771b5cc69, -, 2),
-            HEX_DBL(+, 1, 94203a6292a05, -, 2),
-            HEX_DBL(+, 1, bfb4f9cb333a4, -, 2),
-            HEX_DBL(+, 1, e982376ddb80e, -, 2),
-            HEX_DBL(+, 1, 08d5d8769b2b2, -, 1),
-            HEX_DBL(+, 1, 1c288bc00e0cf, -, 1),
-            HEX_DBL(+, 1, 2ec7535b31ecb, -, 1),
-            HEX_DBL(+, 1, 40bed0adc63fb, -, 1),
-            HEX_DBL(+, 1, 521a5c0330615, -, 1),
-            HEX_DBL(+, 1, 62e42f7dd092c, -, 1)
-        };
+        // All entries have >= 64 bits of precision (rather than the expected 53)
+        static const double correction_table[16] = {                   -0.0, HEX_DBL( +, 1, 7a5c722c16058, -, 4 ), HEX_DBL( +, 1, 323db16c89ab1, -, 3 ), HEX_DBL( +, 1, a0f87d180629, -, 3 ),
+                                                       HEX_DBL( +, 1, 050279324e17c, -, 2 ), HEX_DBL( +, 1, 36f885bb270b0, -, 2 ), HEX_DBL( +, 1, 669b771b5cc69, -, 2 ), HEX_DBL( +, 1, 94203a6292a05, -, 2 ),
+                                                       HEX_DBL( +, 1, bfb4f9cb333a4, -, 2 ), HEX_DBL( +, 1, e982376ddb80e, -, 2 ), HEX_DBL( +, 1, 08d5d8769b2b2, -, 1 ), HEX_DBL( +, 1, 1c288bc00e0cf, -, 1 ),
+                                                       HEX_DBL( +, 1, 2ec7535b31ecb, -, 1 ), HEX_DBL( +, 1, 40bed0adc63fb, -, 1 ), HEX_DBL( +, 1, 521a5c0330615, -, 1 ), HEX_DBL( +, 1, 62e42f7dd092c, -, 1 ) };
 
         f *= scale_table[index];
         correctionLo = correction_table[index];
@@ -1804,25 +1605,17 @@
     }
 
 
-    // minmax polynomial for p(x) = (log(x+1) - x)/x valid over the range x =
-    // [-1/16, 1/16]
+    // minmax polynomial for p(x) = (log(x+1) - x)/x valid over the range x = [-1/16, 1/16]
     //          max error HEX_DBL( +, 1, 048f61f9a5eca, -, 52 )
-    double p = HEX_DBL(-, 1, cc33de97a9d7b, -, 46)
-        + (HEX_DBL(-, 1, fffffffff3eb7, -, 2)
-           + (HEX_DBL(+, 1, 5555555633ef7, -, 2)
-              + (HEX_DBL(-, 1, 00000062c78, -, 2)
-                 + (HEX_DBL(+, 1, 9999958a3321, -, 3)
-                    + (HEX_DBL(-, 1, 55534ce65c347, -, 3)
-                       + (HEX_DBL(+, 1, 24957208391a5, -, 3)
-                          + (HEX_DBL(-, 1, 02287b9a5b4a1, -, 3)
-                             + HEX_DBL(+, 1, c757d922180ed, -, 4) * x)
-                              * x)
-                           * x)
-                        * x)
-                     * x)
-                  * x)
-               * x)
-            * x;
+    double p = HEX_DBL( -, 1, cc33de97a9d7b,  -, 46 ) +
+               (HEX_DBL( -, 1, fffffffff3eb7, -, 2 ) +
+               (HEX_DBL( +, 1, 5555555633ef7, -, 2 ) +
+               (HEX_DBL( -, 1, 00000062c78,   -, 2 ) +
+               (HEX_DBL( +, 1, 9999958a3321,  -, 3 ) +
+               (HEX_DBL( -, 1, 55534ce65c347, -, 3 ) +
+               (HEX_DBL( +, 1, 24957208391a5, -, 3 ) +
+               (HEX_DBL( -, 1, 02287b9a5b4a1, -, 3 ) +
+                HEX_DBL( +, 1, c757d922180ed, -, 4 ) * x)*x)*x)*x)*x)*x)*x)*x;
 
     // log(x+1) = x * p(x) + x
     x += x * p;
@@ -1830,23 +1623,22 @@
     return correctionHi + (correctionLo + x);
 }
 
-double reference_logb(double x)
+double reference_logb( double x )
 {
-    union {
-        float f;
-        cl_uint u;
-    } u;
-    u.f = (float)x;
+    union { float f; cl_uint u;} u;
+    u.f = (float) x;
 
     cl_int exponent = (u.u >> 23) & 0xff;
-    if (exponent == 0xff) return x * x;
+    if( exponent == 0xff )
+        return x * x;
 
-    if (exponent == 0)
-    { // deal with denormals
+    if( exponent == 0 )
+    {   // deal with denormals
         u.u = (u.u & 0x007fffff) | 0x3f800000;
         u.f -= 1.0f;
         exponent = (u.u >> 23) & 0xff;
-        if (exponent == 0) return -INFINITY;
+        if( exponent == 0 )
+            return -INFINITY;
 
         return exponent - (127 + 126);
     }
@@ -1854,271 +1646,219 @@
     return exponent - 127;
 }
 
-double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); }
-
-double reference_reciprocal(double x) { return 1.0 / x; }
-
-double reference_remainder(double x, double y)
+double reference_relaxed_reciprocal(double x)
 {
-    int i;
-    return reference_remquo(x, y, &i);
+  return 1.0f / ((float) x);
 }
 
-double reference_lgamma(double x)
+double reference_reciprocal( double x )
 {
-    /*
-     * ====================================================
-     * This function is from fdlibm. http://www.netlib.org
-     * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     *
-     */
+  return 1.0 / x;
+}
 
-    static const double // two52 = 4.50359962737049600000e+15, /* 0x43300000,
-                        // 0x00000000 */
-        half = 5.00000000000000000000e-01, /* 0x3FE00000,
-                                              0x00000000 */
-        one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
-        pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
-        a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
-        a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
-        a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
-        a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
-        a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
-        a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
-        a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
-        a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
-        a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
-        a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
-        a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
-        a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
-        tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
-        tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
-        /* tt = -(tail of tf) */
-        tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
-        t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
-        t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
-        t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
-        t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
-        t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
-        t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
-        t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
-        t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
-        t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
-        t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
-        t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
-        t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
-        t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
-        t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
-        t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
-        u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
-        u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
-        u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
-        u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
-        u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
-        u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
-        v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
-        v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
-        v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
-        v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
-        v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
-        s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
-        s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
-        s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
-        s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
-        s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
-        s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
-        s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
-        r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
-        r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
-        r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
-        r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
-        r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
-        r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
-        w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
-        w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
-        w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
-        w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
-        w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
-        w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
-        w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
+double reference_remainder( double x, double y )
+{
+    int i;
+    return reference_remquo( x, y, &i );
+}
 
-    static const double zero = 0.00000000000000000000e+00;
-    double t, y, z, nadj, p, p1, p2, p3, q, r, w;
-    cl_int i, hx, lx, ix;
+double reference_lgamma( double x)
+{
+/*
+ * ====================================================
+ * This function is from fdlibm. http://www.netlib.org
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ */
 
-    union {
-        double d;
-        cl_ulong u;
-    } u;
-    u.d = x;
+static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
+                    half=  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+                    one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+                    pi  =  3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
+                    a0  =  7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
+                    a1  =  3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
+                    a2  =  6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
+                    a3  =  2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
+                    a4  =  7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
+                    a5  =  2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
+                    a6  =  1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
+                    a7  =  5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
+                    a8  =  2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
+                    a9  =  1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
+                    a10 =  2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
+                    a11 =  4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
+                    tc  =  1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
+                    tf  = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
+                    /* tt = -(tail of tf) */
+                    tt  = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
+                    t0  =  4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
+                    t1  = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
+                    t2  =  6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
+                    t3  = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
+                    t4  =  1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
+                    t5  = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
+                    t6  =  6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
+                    t7  = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
+                    t8  =  2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
+                    t9  = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
+                    t10 =  8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
+                    t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
+                    t12 =  3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
+                    t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
+                    t14 =  3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
+                    u0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
+                    u1  =  6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
+                    u2  =  1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
+                    u3  =  9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
+                    u4  =  2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
+                    u5  =  1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
+                    v1  =  2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
+                    v2  =  2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
+                    v3  =  7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
+                    v4  =  1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
+                    v5  =  3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
+                    s0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
+                    s1  =  2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
+                    s2  =  3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
+                    s3  =  1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
+                    s4  =  2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
+                    s5  =  1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
+                    s6  =  3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
+                    r1  =  1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
+                    r2  =  7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
+                    r3  =  1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
+                    r4  =  1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
+                    r5  =  7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
+                    r6  =  7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
+                    w0  =  4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
+                    w1  =  8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
+                    w2  = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
+                    w3  =  7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
+                    w4  = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
+                    w5  =  8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
+                    w6  = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
 
-    hx = (cl_int)(u.u >> 32);
-    lx = (cl_int)(u.u & 0xffffffffULL);
+    static const double zero=  0.00000000000000000000e+00;
+    double t,y,z,nadj,p,p1,p2,p3,q,r,w;
+    cl_int i,hx,lx,ix;
+
+    union{ double d; cl_ulong u;}u; u.d = x;
+
+    hx = (cl_int) (u.u >> 32);
+    lx = (cl_int) (u.u & 0xffffffffULL);
 
     /* purge off +-inf, NaN, +-0, and negative arguments */
-    //    *signgamp = 1;
-    ix = hx & 0x7fffffff;
-    if (ix >= 0x7ff00000) return x * x;
-    if ((ix | lx) == 0) return INFINITY;
-    if (ix < 0x3b900000)
-    { /* |x|<2**-70, return -log(|x|) */
-        if (hx < 0)
-        {
-            //            *signgamp = -1;
+//    *signgamp = 1;
+    ix = hx&0x7fffffff;
+    if(ix>=0x7ff00000) return x*x;
+    if((ix|lx)==0) return INFINITY;
+    if(ix<0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
+        if(hx<0) {
+//            *signgamp = -1;
             return -reference_log(-x);
-        }
-        else
-            return -reference_log(x);
+        } else return -reference_log(x);
     }
-    if (hx < 0)
-    {
-        if (ix >= 0x43300000) /* |x|>=2**52, must be -integer */
-            return INFINITY;
+    if(hx<0) {
+        if(ix>=0x43300000)     /* |x|>=2**52, must be -integer */
+        return INFINITY;
         t = reference_sinpi(x);
-        if (t == zero) return INFINITY; /* -integer */
-        nadj = reference_log(pi / reference_fabs(t * x));
-        //        if(t<zero) *signgamp = -1;
+        if(t==zero) return INFINITY; /* -integer */
+        nadj = reference_log(pi/reference_fabs(t*x));
+//        if(t<zero) *signgamp = -1;
         x = -x;
     }
 
     /* purge off 1 and 2 */
-    if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0))
-        r = 0;
+    if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
     /* for x < 2.0 */
-    else if (ix < 0x40000000)
-    {
-        if (ix <= 0x3feccccc)
-        { /* lgamma(x) = lgamma(x+1)-log(x) */
-            r = -reference_log(x);
-            if (ix >= 0x3FE76944)
-            {
-                y = 1.0 - x;
-                i = 0;
-            }
-            else if (ix >= 0x3FCDA661)
-            {
-                y = x - (tc - one);
-                i = 1;
-            }
-            else
-            {
-                y = x;
-                i = 2;
-            }
+    else if(ix<0x40000000) {
+        if(ix<=0x3feccccc) {     /* lgamma(x) = lgamma(x+1)-log(x) */
+        r = -reference_log(x);
+        if(ix>=0x3FE76944) {y = 1.0-x; i= 0;}
+        else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
+          else {y = x; i=2;}
+        } else {
+          r = zero;
+            if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
+            else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
+        else {y=x-one;i=2;}
         }
-        else
-        {
-            r = zero;
-            if (ix >= 0x3FFBB4C3)
-            {
-                y = 2.0 - x;
-                i = 0;
-            } /* [1.7316,2] */
-            else if (ix >= 0x3FF3B4C4)
-            {
-                y = x - tc;
-                i = 1;
-            } /* [1.23,1.73] */
-            else
-            {
-                y = x - one;
-                i = 2;
-            }
-        }
-        switch (i)
-        {
-            case 0:
-                z = y * y;
-                p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
-                p2 = z
-                    * (a1
-                       + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
-                p = y * p1 + p2;
-                r += (p - 0.5 * y);
-                break;
-            case 1:
-                z = y * y;
-                w = z * y;
-                p1 = t0
-                    + w
-                        * (t3
-                           + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
-                p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
-                p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
-                p = z * p1 - (tt - w * (p2 + y * p3));
-                r += (tf + p);
-                break;
-            case 2:
-                p1 = y
-                    * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
-                p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
-                r += (-0.5 * y + p1 / p2);
+        switch(i) {
+          case 0:
+        z = y*y;
+        p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
+        p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
+        p  = y*p1+p2;
+        r  += (p-0.5*y); break;
+          case 1:
+        z = y*y;
+        w = z*y;
+        p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12)));    /* parallel comp */
+        p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
+        p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
+        p  = z*p1-(tt-w*(p2+y*p3));
+        r += (tf + p); break;
+          case 2:
+        p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
+        p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
+        r += (-0.5*y + p1/p2);
         }
     }
-    else if (ix < 0x40200000)
-    { /* x < 8.0 */
+    else if(ix<0x40200000) {             /* x < 8.0 */
         i = (int)x;
         t = zero;
-        y = x - (double)i;
-        p = y
-            * (s0
-               + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
-        q = one + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
-        r = half * y + p / q;
-        z = one; /* lgamma(1+s) = log(s) + lgamma(s) */
-        switch (i)
-        {
-            case 7: z *= (y + 6.0); /* FALLTHRU */
-            case 6: z *= (y + 5.0); /* FALLTHRU */
-            case 5: z *= (y + 4.0); /* FALLTHRU */
-            case 4: z *= (y + 3.0); /* FALLTHRU */
-            case 3:
-                z *= (y + 2.0); /* FALLTHRU */
-                r += reference_log(z);
-                break;
+        y = x-(double)i;
+        p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
+        q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
+        r = half*y+p/q;
+        z = one;    /* lgamma(1+s) = log(s) + lgamma(s) */
+        switch(i) {
+        case 7: z *= (y+6.0);    /* FALLTHRU */
+        case 6: z *= (y+5.0);    /* FALLTHRU */
+        case 5: z *= (y+4.0);    /* FALLTHRU */
+        case 4: z *= (y+3.0);    /* FALLTHRU */
+        case 3: z *= (y+2.0);    /* FALLTHRU */
+            r += reference_log(z); break;
         }
-        /* 8.0 <= x < 2**58 */
-    }
-    else if (ix < 0x43900000)
-    {
+    /* 8.0 <= x < 2**58 */
+    } else if (ix < 0x43900000) {
         t = reference_log(x);
-        z = one / x;
-        y = z * z;
-        w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
-        r = (x - half) * (t - one) + w;
-    }
-    else
-        /* 2**58 <= x <= inf */
-        r = x * (reference_log(x) - one);
-    if (hx < 0) r = nadj - r;
+        z = one/x;
+        y = z*z;
+        w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
+        r = (x-half)*(t-one)+w;
+    } else
+    /* 2**58 <= x <= inf */
+        r =  x*(reference_log(x)-one);
+    if(hx<0) r = nadj - r;
     return r;
+
 }
 
 #endif // _MSC_VER
 
-double reference_assignment(double x) { return x; }
+double reference_assignment( double x ){ return x; }
 
-int reference_not(double x)
+int reference_not( double x )
 {
-    int r = !x;
-    return r;
+  int r = !x;
+  return r;
 }
 
 #pragma mark -
 #pragma mark Double testing
 
 #ifndef M_PIL
-#define M_PIL                                                                  \
-    3.14159265358979323846264338327950288419716939937510582097494459230781640628620899L
+    #define M_PIL        3.14159265358979323846264338327950288419716939937510582097494459230781640628620899L
 #endif
 
-static long double reduce1l(long double x);
+static long double reduce1l( long double x );
 
 #ifdef __PPC__
 // Since long double on PPC is really extended precision double arithmetic
@@ -2127,35 +1867,36 @@
 // such that reduction algorithm used for other architectures will not work.
 // Instead and alternate reduction method is used.
 
-static long double reduce1l(long double x)
+static long double reduce1l( long double x )
 {
-    union {
-        long double ld;
-        double d[2];
-    } u;
+  union {
+    long double ld;
+    double d[2];
+  } u;
 
-    // Reduce the high and low halfs separately.
-    u.ld = x;
-    return ((long double)reduce1(u.d[0]) + reduce1(u.d[1]));
+  // Reduce the high and low halfs separately.
+  u.ld = x;
+  return ((long double)reduce1(u.d[0]) + reduce1(u.d[1]));
 }
 
 #else // !__PPC__
 
-static long double reduce1l(long double x)
+static long double reduce1l( long double x )
 {
     static long double unit_exp = 0;
-    if (0.0L == unit_exp) unit_exp = scalbnl(1.0L, LDBL_MANT_DIG);
+    if( 0.0L == unit_exp )
+        unit_exp = scalbnl( 1.0L, LDBL_MANT_DIG);
 
-    if (reference_fabsl(x) >= unit_exp)
+    if( reference_fabsl(x) >= unit_exp )
     {
-        if (reference_fabsl(x) == INFINITY) return cl_make_nan();
+        if( reference_fabsl(x) == INFINITY )
+            return cl_make_nan();
 
-        return 0.0L; // we patch up the sign for sinPi and cosPi later, since
-                     // they need different signs
+        return 0.0L; //we patch up the sign for sinPi and cosPi later, since they need different signs
     }
 
     // Find the nearest multiple of 2
-    const long double r = reference_copysignl(unit_exp, x);
+    const long double r = reference_copysignl( unit_exp, x );
     long double z = x + r;
     z -= r;
 
@@ -2164,31 +1905,19 @@
 }
 #endif // __PPC__
 
-long double reference_acospil(long double x)
+long double reference_acospil( long double x){  return reference_acosl( x ) / M_PIL;    }
+long double reference_asinpil( long double x){  return reference_asinl( x ) / M_PIL;    }
+long double reference_atanpil( long double x){  return reference_atanl( x ) / M_PIL;    }
+long double reference_atan2pil( long double y, long double x){ return reference_atan2l( y, x) / M_PIL; }
+long double reference_cospil( long double x)
 {
-    return reference_acosl(x) / M_PIL;
-}
-long double reference_asinpil(long double x)
-{
-    return reference_asinl(x) / M_PIL;
-}
-long double reference_atanpil(long double x)
-{
-    return reference_atanl(x) / M_PIL;
-}
-long double reference_atan2pil(long double y, long double x)
-{
-    return reference_atan2l(y, x) / M_PIL;
-}
-long double reference_cospil(long double x)
-{
-    if (reference_fabsl(x) >= HEX_LDBL(+, 1, 0, +, 54))
+    if( reference_fabsl(x) >= HEX_LDBL( +, 1, 0, +, 54 ) )
     {
-        if (reference_fabsl(x) == INFINITY) return cl_make_nan();
+        if( reference_fabsl(x) == INFINITY )
+            return cl_make_nan();
 
-        // Note this probably fails for odd values between 0x1.0p52 and
-        // 0x1.0p53. However, when starting with single precision inputs, there
-        // will be no odd values.
+        //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53.
+        //However, when starting with single precision inputs, there will be no odd values.
 
         return 1.0L;
     }
@@ -2200,9 +1929,9 @@
     // phase adjust
     double xhi = 0.0;
     double xlo = 0.0;
-    xhi = (double)x + 0.5;
+    xhi = (double) x + 0.5;
 
-    if (reference_fabsl(x) > 0.5L)
+    if(reference_fabsl(x) > 0.5L)
     {
         xlo = xhi - x;
         xlo = 0.5 - xlo;
@@ -2214,69 +1943,61 @@
     }
 
     // reduce to [-0.5, 0.5]
-    if (xhi < -0.5)
+    if( xhi < -0.5 )
     {
         xhi = -1.0 - xhi;
         xlo = -xlo;
     }
-    else if (xhi > 0.5)
+    else if ( xhi > 0.5 )
     {
         xhi = 1.0 - xhi;
         xlo = -xlo;
     }
 
     // cosPi zeros are all +0
-    if (xhi == 0.0 && xlo == 0.0) return 0.0;
+    if( xhi == 0.0 && xlo == 0.0 )
+        return 0.0;
 
     xhi *= M_PI;
     xlo *= M_PI;
 
     xhi += xlo;
 
-    return reference_sinl(xhi);
+    return reference_sinl( xhi );
 
 #else
     // phase adjust
     x += 0.5L;
 
     // reduce to [-0.5, 0.5]
-    if (x < -0.5L)
+    if( x < -0.5L )
         x = -1.0L - x;
-    else if (x > 0.5L)
+    else if ( x > 0.5L )
         x = 1.0L - x;
 
     // cosPi zeros are all +0
-    if (x == 0.0L) return 0.0L;
+    if( x == 0.0L )
+        return 0.0L;
 
-    return reference_sinl(x * M_PIL);
+    return reference_sinl( x * M_PIL );
 #endif
 }
 
-long double reference_dividel(long double x, long double y)
+long double reference_dividel( long double x, long double y)
 {
     double dx = x;
     double dy = y;
-    return dx / dy;
+    return dx/dy;
 }
 
-typedef struct
-{
-    double hi, lo;
-} double_double;
+typedef struct{ double hi, lo; } double_double;
 
-// Split doubles_double into a series of consecutive 26-bit precise doubles and
-// a remainder. Note for later -- for multiplication, it might be better to
-// split each double into a power of two and two 26 bit portions
-//                      multiplication of a double double by a known power of
-//                      two is cheap. The current approach causes some inexact
-//                      arithmetic in mul_dd.
-static inline void split_dd(double_double x, double_double *hi,
-                            double_double *lo)
+// Split doubles_double into a series of consecutive 26-bit precise doubles and a remainder.
+// Note for later -- for multiplication, it might be better to split each double into a power of two and two 26 bit portions
+//                      multiplication of a double double by a known power of two is cheap. The current approach causes some inexact arithmetic in mul_dd.
+static inline void split_dd( double_double x, double_double *hi, double_double *lo )
 {
-    union {
-        double d;
-        cl_ulong u;
-    } u;
+    union{ double d; cl_ulong u;}u;
     u.d = x.hi;
     u.u &= 0xFFFFFFFFF8000000ULL;
     hi->hi = u.d;
@@ -2298,10 +2019,10 @@
     lo->lo = x.hi + x.lo;
 }
 
-static inline double_double accum_d(double_double a, double b)
+static inline double_double accum_d( double_double a, double b )
 {
     double temp;
-    if (fabs(b) > fabs(a.hi))
+    if( fabs(b) > fabs(a.hi) )
     {
         temp = a.hi;
         a.hi += b;
@@ -2314,45 +2035,47 @@
         a.lo += b - (a.hi - temp);
     }
 
-    if (isnan(a.lo)) a.lo = 0.0;
+    if( isnan( a.lo ) )
+        a.lo = 0.0;
 
     return a;
 }
 
-static inline double_double add_dd(double_double a, double_double b)
+static inline double_double add_dd( double_double a, double_double b )
 {
-    double_double r = { -0.0 - 0.0 };
+    double_double r = {-0.0 -0.0 };
 
-    if (isinf(a.hi) || isinf(b.hi) || isnan(a.hi) || isnan(b.hi) || 0.0 == a.hi
-        || 0.0 == b.hi)
+    if( isinf(a.hi) || isinf( b.hi )  ||
+       isnan(a.hi) || isnan( b.hi )  ||
+       0.0 == a.hi || 0.0 == b.hi )
     {
         r.hi = a.hi + b.hi;
         r.lo = a.lo + b.lo;
-        if (isnan(r.lo)) r.lo = 0.0;
+        if( isnan( r.lo ) )
+            r.lo = 0.0;
         return r;
     }
 
-    // merge sort terms by magnitude -- here we assume that |a.hi| > |a.lo|,
-    // |b.hi| > |b.lo|, so we don't have to do the first merge pass
+    //merge sort terms by magnitude -- here we assume that |a.hi| > |a.lo|, |b.hi| > |b.lo|, so we don't have to do the first merge pass
     double terms[4] = { a.hi, b.hi, a.lo, b.lo };
     double temp;
 
-    // Sort hi terms
-    if (fabs(terms[0]) < fabs(terms[1]))
+    //Sort hi terms
+    if( fabs(terms[0]) < fabs(terms[1]) )
     {
         temp = terms[0];
         terms[0] = terms[1];
         terms[1] = temp;
     }
-    // sort lo terms
-    if (fabs(terms[2]) < fabs(terms[3]))
+    //sort lo terms
+    if( fabs(terms[2]) < fabs(terms[3]) )
     {
         temp = terms[2];
         terms[2] = terms[3];
         terms[3] = temp;
     }
     // Fix case where small high term is less than large low term
-    if (fabs(terms[1]) < fabs(terms[2]))
+    if( fabs(terms[1]) < fabs(terms[2]) )
     {
         temp = terms[1];
         terms[1] = terms[2];
@@ -2375,96 +2098,111 @@
     temp = r.hi;
     r.hi += r.lo;
     r.lo = r.lo - (r.hi - temp);
-    if (isnan(r.lo)) r.lo = 0.0;
+    if( isnan( r.lo ) )
+        r.lo = 0.0;
 
     return r;
 }
 
-static inline double_double mul_dd(double_double a, double_double b)
+static inline double_double mul_dd( double_double a, double_double b )
 {
-    double_double result = { -0.0, -0.0 };
+    double_double result = {-0.0,-0.0};
 
     // Inf, nan and 0
-    if (isnan(a.hi) || isnan(b.hi) || isinf(a.hi) || isinf(b.hi) || 0.0 == a.hi
-        || 0.0 == b.hi)
+    if( isnan( a.hi ) || isnan( b.hi ) ||
+       isinf( a.hi ) || isinf( b.hi ) ||
+       0.0 == a.hi || 0.0 == b.hi )
     {
         result.hi = a.hi * b.hi;
         return result;
     }
 
     double_double ah, al, bh, bl;
-    split_dd(a, &ah, &al);
-    split_dd(b, &bh, &bl);
+    split_dd( a, &ah, &al );
+    split_dd( b, &bh, &bl );
 
-    double p0 = ah.hi * bh.hi; // exact    (52 bits in product) 0
-    double p1 = ah.hi * bh.lo; // exact    (52 bits in product) 26
-    double p2 = ah.lo * bh.hi; // exact    (52 bits in product) 26
-    double p3 = ah.lo * bh.lo; // exact    (52 bits in product) 52
-    double p4 = al.hi * bh.hi; // exact    (52 bits in product) 52
-    double p5 = al.hi * bh.lo; // exact    (52 bits in product) 78
-    double p6 = al.lo * bh.hi; // inexact  (54 bits in product) 78
-    double p7 = al.lo * bh.lo; // inexact  (54 bits in product) 104
-    double p8 = ah.hi * bl.hi; // exact    (52 bits in product) 52
-    double p9 = ah.hi * bl.lo; // inexact  (54 bits in product) 78
-    double pA = ah.lo * bl.hi; // exact    (52 bits in product) 78
-    double pB = ah.lo * bl.lo; // inexact  (54 bits in product) 104
-    double pC = al.hi * bl.hi; // exact    (52 bits in product) 104
+    double p0 = ah.hi * bh.hi;        // exact    (52 bits in product) 0
+    double p1 = ah.hi * bh.lo;        // exact    (52 bits in product) 26
+    double p2 = ah.lo * bh.hi;        // exact    (52 bits in product) 26
+    double p3 = ah.lo * bh.lo;        // exact    (52 bits in product) 52
+    double p4 = al.hi * bh.hi;        // exact    (52 bits in product) 52
+    double p5 = al.hi * bh.lo;        // exact    (52 bits in product) 78
+    double p6 = al.lo * bh.hi;        // inexact  (54 bits in product) 78
+    double p7 = al.lo * bh.lo;        // inexact  (54 bits in product) 104
+    double p8 = ah.hi * bl.hi;        // exact    (52 bits in product) 52
+    double p9 = ah.hi * bl.lo;        // inexact  (54 bits in product) 78
+    double pA = ah.lo * bl.hi;        // exact    (52 bits in product) 78
+    double pB = ah.lo * bl.lo;        // inexact  (54 bits in product) 104
+    double pC = al.hi * bl.hi;        // exact    (52 bits in product) 104
     // the last 3 terms are two low to appear in the result
 
 
-    // take advantage of the known relative magnitudes of the partial products
-    // to avoid some sorting Combine 2**-78 and 2**-104 terms. Here we are a bit
-    // sloppy about canonicalizing the double_doubles
+    // accumulate from bottom up
+#if 0
+    // works but slow
+    result.hi = pC;
+    result = accum_d( result, pB );
+    result = accum_d( result, p7 );
+    result = accum_d( result, pA );
+    result = accum_d( result, p9 );
+    result = accum_d( result, p6 );
+    result = accum_d( result, p5 );
+    result = accum_d( result, p8 );
+    result = accum_d( result, p4 );
+    result = accum_d( result, p3 );
+    result = accum_d( result, p2 );
+    result = accum_d( result, p1 );
+    result = accum_d( result, p0 );
+
+    // canonicalize the result
+    double temp = result.hi;
+    result.hi += result.lo;
+    result.lo -= (result.hi - temp);
+    if( isnan( result.lo ) )
+        result.lo = 0.0;
+
+    return result;
+#else
+    // take advantage of the known relative magnitudes of the partial products to avoid some sorting
+    // Combine 2**-78 and 2**-104 terms. Here we are a bit sloppy about canonicalizing the double_doubles
     double_double t0 = { pA, pC };
     double_double t1 = { p9, pB };
     double_double t2 = { p6, p7 };
     double temp0, temp1, temp2;
 
-    t0 = accum_d(t0, p5); // there is an extra 2**-78 term to deal with
+    t0 = accum_d( t0, p5 );  // there is an extra 2**-78 term to deal with
 
-    // Add in 2**-52 terms. Here we are a bit sloppy about canonicalizing the
-    // double_doubles
-    temp0 = t0.hi;
-    temp1 = t1.hi;
-    temp2 = t2.hi;
-    t0.hi += p3;
-    t1.hi += p4;
-    t2.hi += p8;
-    temp0 -= t0.hi - p3;
-    temp1 -= t1.hi - p4;
-    temp2 -= t2.hi - p8;
-    t0.lo += temp0;
-    t1.lo += temp1;
-    t2.lo += temp2;
+    // Add in 2**-52 terms. Here we are a bit sloppy about canonicalizing the double_doubles
+    temp0 = t0.hi;      temp1 = t1.hi;      temp2 = t2.hi;
+    t0.hi += p3;        t1.hi += p4;        t2.hi += p8;
+    temp0 -= t0.hi-p3;  temp1 -= t1.hi-p4;  temp2 -= t2.hi - p8;
+    t0.lo += temp0;     t1.lo += temp1;     t2.lo += temp2;
 
-    // Add in 2**-26 terms. Here we are a bit sloppy about canonicalizing the
-    // double_doubles
-    temp1 = t1.hi;
-    temp2 = t2.hi;
-    t1.hi += p1;
-    t2.hi += p2;
-    temp1 -= t1.hi - p1;
-    temp2 -= t2.hi - p2;
-    t1.lo += temp1;
-    t2.lo += temp2;
+    // Add in 2**-26 terms. Here we are a bit sloppy about canonicalizing the double_doubles
+    temp1 = t1.hi;      temp2 = t2.hi;
+    t1.hi += p1;        t2.hi += p2;
+    temp1 -= t1.hi-p1;  temp2 -= t2.hi - p2;
+    t1.lo += temp1;     t2.lo += temp2;
 
     // Combine accumulators to get the low bits of result
-    t1 = add_dd(t1, add_dd(t2, t0));
+    t1 = add_dd( t1, add_dd( t2, t0 ) );
 
     // Add in MSB's, and round to precision
-    return accum_d(t1, p0); // canonicalizes
+    return accum_d( t1, p0 );  // canonicalizes
+#endif
+
 }
 
 
-long double reference_exp10l(long double z)
+long double reference_exp10l( long double z )
 {
-    const double_double log2_10 = { HEX_DBL(+, 1, a934f0979a371, +, 1),
-                                    HEX_DBL(+, 1, 7f2495fb7fa6d, -, 53) };
+    const double_double log2_10 = { HEX_DBL( +, 1, a934f0979a371, +, 1 ), HEX_DBL( +, 1, 7f2495fb7fa6d, -, 53 ) };
     double_double x;
     int j;
 
     // Handle NaNs
-    if (isnan(z)) return z;
+    if( isnan(z) )
+        return z;
 
     // init x
     x.hi = z;
@@ -2473,193 +2211,172 @@
 
     // 10**x = exp2( x * log2(10) )
 
-    x = mul_dd(x, log2_10); // x * log2(10)
+    x = mul_dd( x, log2_10);    // x * log2(10)
 
-    // Deal with overflow and underflow for exp2(x) stage next
-    if (x.hi >= 1025) return INFINITY;
+    //Deal with overflow and underflow for exp2(x) stage next
+    if( x.hi >= 1025 )
+        return INFINITY;
 
-    if (x.hi < -1075 - 24) return +0.0;
+    if( x.hi < -1075-24 )
+        return +0.0;
 
     // find nearest integer to x
-    int i = (int)rint(x.hi);
+    int i = (int) rint(x.hi);
 
     // x now holds fractional part.  The result would be then 2**i  * exp2( x )
     x.hi -= i;
 
-    // We could attempt to find a minimax polynomial for exp2(x) over the range
-    // x = [-0.5, 0.5]. However, this would converge very slowly near the
-    // extrema, where 0.5**n is not a lot different from 0.5**(n+1), thereby
-    // requiring something like a 20th order polynomial to get 53 + 24 bits of
-    // precision. Instead we further reduce the range to [-1/32, 1/32] by
-    // observing that
+    // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5].
+    // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different
+    // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits
+    // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that
     //
     //  2**(a+b) = 2**a * 2**b
     //
-    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and
-    // reduce the range of x to [-1/32, 1/32] by subtracting away the nearest
-    // value of n/16 from x.
-    const double_double corrections[17] = {
-        { HEX_DBL(+, 1, 6a09e667f3bcd, -, 1),
-          HEX_DBL(-, 1, bdd3413b26456, -, 55) },
-        { HEX_DBL(+, 1, 7a11473eb0187, -, 1),
-          HEX_DBL(-, 1, 41577ee04992f, -, 56) },
-        { HEX_DBL(+, 1, 8ace5422aa0db, -, 1),
-          HEX_DBL(+, 1, 6e9f156864b27, -, 55) },
-        { HEX_DBL(+, 1, 9c49182a3f09, -, 1),
-          HEX_DBL(+, 1, c7c46b071f2be, -, 57) },
-        { HEX_DBL(+, 1, ae89f995ad3ad, -, 1),
-          HEX_DBL(+, 1, 7a1cd345dcc81, -, 55) },
-        { HEX_DBL(+, 1, c199bdd85529c, -, 1),
-          HEX_DBL(+, 1, 11065895048dd, -, 56) },
-        { HEX_DBL(+, 1, d5818dcfba487, -, 1),
-          HEX_DBL(+, 1, 2ed02d75b3707, -, 56) },
-        { HEX_DBL(+, 1, ea4afa2a490da, -, 1),
-          HEX_DBL(-, 1, e9c23179c2893, -, 55) },
-        { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-        { HEX_DBL(+, 1, 0b5586cf9890f, +, 0),
-          HEX_DBL(+, 1, 8a62e4adc610b, -, 54) },
-        { HEX_DBL(+, 1, 172b83c7d517b, +, 0),
-          HEX_DBL(-, 1, 19041b9d78a76, -, 55) },
-        { HEX_DBL(+, 1, 2387a6e756238, +, 0),
-          HEX_DBL(+, 1, 9b07eb6c70573, -, 54) },
-        { HEX_DBL(+, 1, 306fe0a31b715, +, 0),
-          HEX_DBL(+, 1, 6f46ad23182e4, -, 55) },
-        { HEX_DBL(+, 1, 3dea64c123422, +, 0),
-          HEX_DBL(+, 1, ada0911f09ebc, -, 55) },
-        { HEX_DBL(+, 1, 4bfdad5362a27, +, 0),
-          HEX_DBL(+, 1, d4397afec42e2, -, 56) },
-        { HEX_DBL(+, 1, 5ab07dd485429, +, 0),
-          HEX_DBL(+, 1, 6324c054647ad, -, 54) },
-        { HEX_DBL(+, 1, 6a09e667f3bcd, +, 0),
-          HEX_DBL(-, 1, bdd3413b26456, -, 54) }
+    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range
+    // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x.
+    const double_double corrections[17] =
+    {
+        { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) },
+        { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) },
+        { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) },
+        { HEX_DBL( +, 1, 9c49182a3f09,  -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) },
+        { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) },
+        { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) },
+        { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) },
+        { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) },
+        { HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ) },
+        { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) },
+        { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) },
+        { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) },
+        { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) },
+        { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) },
+        { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) },
+        { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) },
+        { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) }
 
     };
-    int index = (int)rint(x.hi * 16.0);
-    x.hi -= (double)index * 0.0625;
+    int index = (int) rint( x.hi * 16.0 );
+    x.hi -= (double) index * 0.0625;
 
     // canonicalize x
     double temp = x.hi;
     x.hi += x.lo;
     x.lo -= x.hi - temp;
 
-    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max
-    // Error: 2 * 0x1.e112p-87
-    const double_double c[] = { { HEX_DBL(+, 1, 62e42fefa39ef, -, 1),
-                                  HEX_DBL(+, 1, abc9e3ac1d244, -, 56) },
-                                { HEX_DBL(+, 1, ebfbdff82c58f, -, 3),
-                                  HEX_DBL(-, 1, 5e4987a631846, -, 57) },
-                                { HEX_DBL(+, 1, c6b08d704a0c, -, 5),
-                                  HEX_DBL(-, 1, d323200a05713, -, 59) },
-                                { HEX_DBL(+, 1, 3b2ab6fba4e7a, -, 7),
-                                  HEX_DBL(+, 1, c5ee8f8b9f0c1, -, 63) },
-                                { HEX_DBL(+, 1, 5d87fe78a672a, -, 10),
-                                  HEX_DBL(+, 1, 884e5e5cc7ecc, -, 64) },
-                                { HEX_DBL(+, 1, 430912f7e8373, -, 13),
-                                  HEX_DBL(+, 1, 4f1b59514a326, -, 67) },
-                                { HEX_DBL(+, 1, ffcbfc5985e71, -, 17),
-                                  HEX_DBL(-, 1, db7d6a0953b78, -, 71) },
-                                { HEX_DBL(+, 1, 62c150eb16465, -, 20),
-                                  HEX_DBL(+, 1, e0767c2d7abf5, -, 80) },
-                                { HEX_DBL(+, 1, b52502b5e953, -, 24),
-                                  HEX_DBL(+, 1, 6797523f944bc, -, 78) } };
-    size_t count = sizeof(c) / sizeof(c[0]);
+    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max Error: 2 * 0x1.e112p-87
+    const double_double c[] = {
+        {HEX_DBL( +, 1, 62e42fefa39ef, -,  1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )},
+        {HEX_DBL( +, 1, ebfbdff82c58f, -,  3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )},
+        {HEX_DBL( +, 1, c6b08d704a0c,  -,  5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )},
+        {HEX_DBL( +, 1, 3b2ab6fba4e7a, -,  7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )},
+        {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )},
+        {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )},
+        {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )},
+        {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )},
+        {HEX_DBL( +, 1, b52502b5e953,  -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )}
+    };
+    size_t count = sizeof( c ) / sizeof( c[0] );
 
     // Do polynomial
-    double_double r = c[count - 1];
-    for (j = (int)count - 2; j >= 0; j--) r = add_dd(c[j], mul_dd(r, x));
+    double_double r = c[count-1];
+    for( j = (int) count-2; j >= 0; j-- )
+        r = add_dd( c[j], mul_dd( r, x ) );
 
     // unwind approximation
-    r = mul_dd(r, x); // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
+    r = mul_dd( r, x );     // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
 
     // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above
     //  exp2(x) = (r + 1) * correction = r * correction + correction
-    r = mul_dd(r, corrections[index + 8]);
-    r = add_dd(r, corrections[index + 8]);
+    r = mul_dd( r, corrections[index+8] );
+    r = add_dd( r, corrections[index+8] );
 
-    // Format result for output:
+// Format result for output:
 
     // Get mantissa
-    long double m = ((long double)r.hi + (long double)r.lo);
+    long double m = ((long double) r.hi + (long double) r.lo );
 
     // Handle a pesky overflow cases when long double = double
-    if (i > 512)
+    if( i > 512 )
     {
-        m *= HEX_DBL(+, 1, 0, +, 512);
+        m *=  HEX_DBL( +, 1, 0, +, 512 );
         i -= 512;
     }
-    else if (i < -512)
+    else if( i < -512 )
     {
-        m *= HEX_DBL(+, 1, 0, -, 512);
+        m *= HEX_DBL( +, 1, 0, -, 512 );
         i += 512;
     }
 
-    return m * ldexpl(1.0L, i);
+    return m * ldexpl( 1.0L, i );
 }
 
 
-static double fallback_frexp(double x, int *iptr)
+static double fallback_frexp( double x, int *iptr )
 {
     cl_ulong u, v;
     double fu, fv;
 
-    memcpy(&u, &x, sizeof(u));
+    memcpy( &u, &x, sizeof(u));
 
-    cl_ulong exponent = u & 0x7ff0000000000000ULL;
+    cl_ulong exponent = u &  0x7ff0000000000000ULL;
     cl_ulong mantissa = u & ~0x7ff0000000000000ULL;
 
     // add 1 to the exponent
     exponent += 0x0010000000000000ULL;
 
-    if ((cl_long)exponent < (cl_long)0x0020000000000000LL)
+    if( (cl_long) exponent < (cl_long) 0x0020000000000000LL )
     { // subnormal, NaN, Inf
         mantissa |= 0x3fe0000000000000ULL;
 
         v = mantissa & 0xfff0000000000000ULL;
         u = mantissa;
-        memcpy(&fv, &v, sizeof(v));
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fv, &v, sizeof(v));
+        memcpy( &fu, &u, sizeof(u));
 
         fu -= fv;
 
-        memcpy(&v, &fv, sizeof(v));
-        memcpy(&u, &fu, sizeof(u));
+        memcpy( &v, &fv, sizeof(v));
+        memcpy( &u, &fu, sizeof(u));
 
-        exponent = u & 0x7ff0000000000000ULL;
+        exponent = u &  0x7ff0000000000000ULL;
         mantissa = u & ~0x7ff0000000000000ULL;
 
-        *iptr = (exponent >> 52) + (-1022 + 1 - 1022);
+        *iptr = (exponent >> 52) + (-1022 + 1 -1022);
         u = mantissa | 0x3fe0000000000000ULL;
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fu, &u, sizeof(u));
         return fu;
     }
 
     *iptr = (exponent >> 52) - 1023;
     u = mantissa | 0x3fe0000000000000ULL;
-    memcpy(&fu, &u, sizeof(u));
+    memcpy( &fu, &u, sizeof(u));
     return fu;
 }
 
 // Assumes zeros, infinities and NaNs handed elsewhere
-static inline int extract(double x, cl_ulong *mant)
+static inline int extract( double x, cl_ulong *mant );
+static inline int extract( double x, cl_ulong *mant )
 {
-    static double (*frexpp)(double, int *) = NULL;
+    static double (*frexpp)(double, int*) = NULL;
     int e;
 
     // verify that frexp works properly
-    if (NULL == frexpp)
+    if( NULL == frexpp )
     {
-        if (0.5 == frexp(HEX_DBL(+, 1, 0, -, 1030), &e) && e == -1029)
+        if( 0.5 == frexp( HEX_DBL( +, 1, 0, -, 1030 ), &e ) && e == -1029 )
             frexpp = frexp;
         else
             frexpp = fallback_frexp;
     }
 
-    *mant = (cl_ulong)(HEX_DBL(+, 1, 0, +, 64) * fabs(frexpp(x, &e)));
+    *mant = (cl_ulong) (HEX_DBL( +, 1, 0, +, 64 ) * fabs( frexpp( x, &e )));
     return e - 1;
 }
 
 // Return 128-bit product of a*b  as (hi << 64) + lo
-static inline void mul128(cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo)
+static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo );
+static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo )
 {
     cl_ulong alo = a & 0xffffffffULL;
     cl_ulong ahi = a >> 32;
@@ -2670,90 +2387,92 @@
     cl_ulong ahiblo = ahi * blo;
     cl_ulong ahibhi = ahi * bhi;
 
-    alobhi += (aloblo >> 32)
-        + (ahiblo
-           & 0xffffffffULL); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   =
-                             // (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
-    *hi = ahibhi + (alobhi >> 32)
-        + (ahiblo >> 32); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   =
-                          // (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
+    alobhi += (aloblo >> 32) + (ahiblo & 0xffffffffULL);  // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
+    *hi = ahibhi + (alobhi >> 32) + (ahiblo >> 32);       // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
     *lo = (aloblo & 0xffffffffULL) | (alobhi << 32);
 }
 
-static double round_to_nearest_even_double(cl_ulong hi, cl_ulong lo,
-                                           int exponent)
+// Move the most significant non-zero bit to the MSB
+// Note: not general. Only works if the most significant non-zero bit is at MSB-1
+static inline void renormalize( cl_ulong *hi, cl_ulong *lo, int *exponent )
 {
-    union {
-        cl_ulong u;
-        cl_double d;
-    } u;
+    if( 0 == (0x8000000000000000ULL & *hi ))
+    {
+        *hi <<= 1;
+        *hi |= *lo >> 63;
+        *lo <<= 1;
+        *exponent -= 1;
+    }
+}
+
+static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent );
+static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent )
+{
+    union{ cl_ulong u; cl_double d;} u;
 
     // edges
-    if (exponent > 1023) return INFINITY;
-    if (exponent == -1075 && (hi | (lo != 0)) > 0x8000000000000000ULL)
-        return HEX_DBL(+, 1, 0, -, 1074);
-    if (exponent <= -1075) return 0.0;
+    if( exponent > 1023 )        return INFINITY;
+    if( exponent == -1075 && (hi | (lo!=0)) > 0x8000000000000000ULL )
+        return HEX_DBL( +, 1, 0, -, 1074 );
+    if( exponent <= -1075 )       return 0.0;
 
-    // Figure out which bits go where
+    //Figure out which bits go where
     int shift = 11;
-    if (exponent < -1022)
+    if( exponent < -1022 )
     {
-        shift -= 1022 + exponent; // subnormal: shift is not 52
-        exponent = -1023; //              set exponent to 0
+        shift -= 1022 + exponent;               // subnormal: shift is not 52
+        exponent = -1023;                       //              set exponent to 0
     }
     else
-        hi &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                     // it.
+        hi &= 0x7fffffffffffffffULL;           // normal: leading bit is implicit. Remove it.
 
     // Assemble the double (round toward zero)
-    u.u = (hi >> shift) | ((cl_ulong)(exponent + 1023) << 52);
+    u.u = (hi >> shift) | ((cl_ulong) (exponent + 1023) << 52);
 
     // put a representation of the residual bits into hi
-    hi <<= (64 - shift);
+    hi <<= (64-shift);
     hi |= lo >> shift;
-    lo <<= (64 - shift);
+    lo <<= (64-shift );
     hi |= lo != 0;
 
-    // round to nearest, ties to even
-    if (hi < 0x8000000000000000ULL) return u.d;
-    if (hi == 0x8000000000000000ULL)
-        u.u += u.u & 1ULL;
-    else
-        u.u++;
+    //round to nearest, ties to even
+    if( hi < 0x8000000000000000ULL )    return u.d;
+    if( hi == 0x8000000000000000ULL )   u.u += u.u & 1ULL;
+    else                                u.u++;
 
     return u.d;
 }
 
-// Shift right.  Bits lost on the right will be OR'd together and OR'd with the
-// LSB
-static inline void shift_right_sticky_128(cl_ulong *hi, cl_ulong *lo, int shift)
+// Shift right.  Bits lost on the right will be OR'd together and OR'd with the LSB
+static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift );
+static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift )
 {
     cl_ulong sticky = 0;
     cl_ulong h = *hi;
     cl_ulong l = *lo;
 
-    if (shift >= 64)
+    if( shift >= 64 )
     {
         shift -= 64;
         sticky = 0 != lo;
         l = h;
         h = 0;
-        if (shift >= 64)
+        if( shift >= 64 )
         {
             sticky |= (0 != l);
             l = 0;
         }
         else
         {
-            sticky |= (0 != (l << (64 - shift)));
+            sticky |= (0 != (l << (64-shift)));
             l >>= shift;
         }
     }
     else
     {
-        sticky |= (0 != (l << (64 - shift)));
+        sticky |= (0 != (l << (64-shift)));
         l >>= shift;
-        l |= h << (64 - shift);
+        l |=  h << (64-shift);
         h >>= shift;
     }
 
@@ -2762,10 +2481,9 @@
 }
 
 // 128-bit add  of ((*hi << 64) + *lo) + ((chi << 64) + clo)
-// If the 129 bit result doesn't fit, bits lost off the right end will be OR'd
-// with the LSB
-static inline void add128(cl_ulong *hi, cl_ulong *lo, cl_ulong chi,
-                          cl_ulong clo, int *exponent)
+// If the 129 bit result doesn't fit, bits lost off the right end will be OR'd with the LSB
+static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exp );
+static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exponent )
 {
     cl_ulong carry, carry2;
     // extended precision add
@@ -2773,16 +2491,15 @@
     chi = add_carry(*hi, chi, &carry2);
     chi = add_carry(chi, carry, &carry);
 
-    // If we overflowed the 128 bit result
-    if (carry || carry2)
+    //If we overflowed the 128 bit result
+    if( carry || carry2 )
     {
-        carry = clo & 1; // set aside low bit
-        clo >>= 1; // right shift low 1
-        clo |= carry; // or back in the low bit, so we don't come to believe
-                      // this is an exact half way case for rounding
-        clo |= chi << 63; // move lowest high bit into highest bit of lo
-        chi >>= 1; // right shift hi
-        chi |= 0x8000000000000000ULL; // move the carry bit into hi.
+        carry = clo & 1;                        // set aside low bit
+        clo >>= 1;                              // right shift low 1
+        clo |= carry;                           // or back in the low bit, so we don't come to believe this is an exact half way case for rounding
+        clo |= chi << 63;                       // move lowest high bit into highest bit of lo
+        chi >>= 1;                              // right shift hi
+        chi |= 0x8000000000000000ULL;           // move the carry bit into hi.
         *exponent = *exponent + 1;
     }
 
@@ -2791,49 +2508,48 @@
 }
 
 // 128-bit subtract  of ((chi << 64) + clo)  - ((*hi << 64) + *lo)
-static inline void sub128(cl_ulong *chi, cl_ulong *clo, cl_ulong hi,
-                          cl_ulong lo, cl_ulong *signC, int *expC)
+static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC );
+static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC )
 {
     cl_ulong rHi = *chi;
     cl_ulong rLo = *clo;
     cl_ulong carry, carry2;
 
-    // extended precision subtract
+    //extended precision subtract
     rLo = sub_carry(rLo, lo, &carry);
     rHi = sub_carry(rHi, hi, &carry2);
     rHi = sub_carry(rHi, carry, &carry);
 
     // Check for sign flip
-    if (carry || carry2)
+    if( carry || carry2 )
     {
         *signC ^= 0x8000000000000000ULL;
 
-        // negate rLo, rHi:   -x = (x ^ -1) + 1
+        //negate rLo, rHi:   -x = (x ^ -1) + 1
         rLo ^= -1ULL;
         rHi ^= -1ULL;
         rLo++;
         rHi += 0 == rLo;
     }
 
-    // normalize -- move the most significant non-zero bit to the MSB, and
-    // adjust exponent accordingly
-    if (rHi == 0)
+    // normalize -- move the most significant non-zero bit to the MSB, and adjust exponent accordingly
+    if( rHi == 0 )
     {
         rHi = rLo;
         *expC = *expC - 64;
         rLo = 0;
     }
 
-    if (rHi)
+    if( rHi )
     {
         int shift = 32;
         cl_ulong test = 1ULL << 32;
-        while (0 == (rHi & 0x8000000000000000ULL))
+        while( 0 == (rHi & 0x8000000000000000ULL))
         {
-            if (rHi < test)
+            if( rHi < test )
             {
                 rHi <<= shift;
-                rHi |= rLo >> (64 - shift);
+                rHi |= rLo >> (64-shift);
                 rLo <<= shift;
                 *expC = *expC - shift;
             }
@@ -2843,7 +2559,7 @@
     }
     else
     {
-        // zero
+        //zero
         *expC = INT_MIN;
         *signC = 0;
     }
@@ -2853,7 +2569,7 @@
     *clo = rLo;
 }
 
-long double reference_fmal(long double x, long double y, long double z)
+long double reference_fmal( long double x, long double y, long double z)
 {
     static const cl_ulong kMSB = 0x8000000000000000ULL;
 
@@ -2863,91 +2579,75 @@
     double c = z;
 
     // Make bits accessible
-    union {
-        cl_ulong u;
-        cl_double d;
-    } ua;
-    ua.d = a;
-    union {
-        cl_ulong u;
-        cl_double d;
-    } ub;
-    ub.d = b;
-    union {
-        cl_ulong u;
-        cl_double d;
-    } uc;
-    uc.d = c;
+    union{ cl_ulong u; cl_double d; } ua; ua.d = a;
+    union{ cl_ulong u; cl_double d; } ub; ub.d = b;
+    union{ cl_ulong u; cl_double d; } uc; uc.d = c;
 
     // deal with Nans, infinities and zeros
-    if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b) || isinf(c)
-        || 0 == (ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior
-        0 == (ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior
-        0 == (uc.u & ~kMSB)) // c == 0, defeat host FTZ behavior
+    if( isnan( a ) || isnan( b ) || isnan(c)    ||
+        isinf( a ) || isinf( b ) || isinf(c)    ||
+        0 == ( ua.u & ~kMSB)                ||  // a == 0, defeat host FTZ behavior
+        0 == ( ub.u & ~kMSB)                ||  // b == 0, defeat host FTZ behavior
+        0 == ( uc.u & ~kMSB)                )   // c == 0, defeat host FTZ behavior
     {
-        if (isinf(c) && !isinf(a) && !isinf(b)) return (c + a) + b;
+        if( isinf( c ) && !isinf(a) && !isinf(b) )
+            return (c + a) + b;
 
-        a = (double)reference_multiplyl(
-            a, b); // some risk that the compiler will insert a non-compliant
-                   // fma here on some platforms.
-        return reference_addl(
-            a,
-            c); // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
+        a = (double) reference_multiplyl( a, b );   // some risk that the compiler will insert a non-compliant fma here on some platforms.
+        return reference_addl(a, c);                // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
     }
 
     // extract exponent and mantissa
     //   exponent is a standard unbiased signed integer
     //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
     cl_ulong mantA, mantB, mantC;
-    int expA = extract(a, &mantA);
-    int expB = extract(b, &mantB);
-    int expC = extract(c, &mantC);
-    cl_ulong signC = uc.u & kMSB; // We'll need the sign bit of C later to
-                                  // decide if we are adding or subtracting
+    int expA = extract( a, &mantA );
+    int expB = extract( b, &mantB );
+    int expC = extract( c, &mantC );
+    cl_ulong signC = uc.u & kMSB;               // We'll need the sign bit of C later to decide if we are adding or subtracting
 
-    // exact product of A and B
+// exact product of A and B
     int exponent = expA + expB;
     cl_ulong sign = (ua.u ^ ub.u) & kMSB;
     cl_ulong hi, lo;
-    mul128(mantA, mantB, &hi, &lo);
+    mul128( mantA, mantB, &hi, &lo );
 
     // renormalize
-    if (0 == (kMSB & hi))
+    if( 0 == (kMSB & hi) )
     {
         hi <<= 1;
         hi |= lo >> 63;
         lo <<= 1;
     }
     else
-        exponent++; // 2**63 * 2**63 gives 2**126. If the MSB was set, then our
-                    // exponent increased.
+        exponent++;         // 2**63 * 2**63 gives 2**126. If the MSB was set, then our exponent increased.
 
-    // infinite precision add
+//infinite precision add
     cl_ulong chi = mantC;
     cl_ulong clo = 0;
 
-    if (exponent >= expC)
+    if( exponent >= expC )
     {
         // Normalize C relative to the product
-        if (exponent > expC)
-            shift_right_sticky_128(&chi, &clo, exponent - expC);
+        if( exponent > expC )
+            shift_right_sticky_128( &chi, &clo, exponent - expC );
 
         // Add
-        if (sign ^ signC)
-            sub128(&hi, &lo, chi, clo, &sign, &exponent);
+        if( sign ^ signC )
+            sub128( &hi, &lo, chi, clo, &sign, &exponent );
         else
-            add128(&hi, &lo, chi, clo, &exponent);
+            add128( &hi, &lo, chi, clo, &exponent );
     }
     else
     {
         // Shift the product relative to C so that their exponents match
-        shift_right_sticky_128(&hi, &lo, expC - exponent);
+        shift_right_sticky_128( &hi, &lo, expC - exponent );
 
         // add
-        if (sign ^ signC)
-            sub128(&chi, &clo, hi, lo, &signC, &expC);
+        if( sign ^ signC )
+            sub128( &chi, &clo, hi, lo, &signC, &expC );
         else
-            add128(&chi, &clo, hi, lo, &expC);
+            add128( &chi, &clo, hi, lo, &expC );
 
         hi = chi;
         lo = clo;
@@ -2965,49 +2665,61 @@
 }
 
 
-long double reference_madl(long double a, long double b, long double c)
+
+
+long double reference_madl( long double a, long double b, long double c) { return a * b + c; }
+
+//long double my_nextafterl(long double x, long double y){  return (long double) nextafter( (double) x, (double) y ); }
+
+long double reference_recipl( long double x){ return 1.0L / x; }
+
+long double reference_rootnl( long double x, int i)
 {
-    return a * b + c;
-}
+    double hi,  lo;
+    long double l;
+    //rootn ( x, 0 )  returns a NaN.
+    if( 0 == i )
+        return cl_make_nan();
 
-long double reference_recipl(long double x) { return 1.0L / x; }
+    //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+    if( x < 0.0L && 0 == (i&1) )
+        return cl_make_nan();
 
-long double reference_rootnl(long double x, int i)
-{
-    // rootn ( x, 0 )  returns a NaN.
-    if (0 == i) return cl_make_nan();
-
-    // rootn ( x, n )  returns a NaN for x < 0 and n is even.
-    if (x < 0.0L && 0 == (i & 1)) return cl_make_nan();
-
-    if (isinf(x))
+    if( isinf(x) )
     {
-        if (i < 0) return reference_copysignl(0.0L, x);
+        if( i < 0 )
+            return reference_copysignl(0.0L, x);
 
         return x;
     }
 
-    if (x == 0.0)
+    if( x == 0.0 )
     {
-        switch (i & 0x80000001)
+        switch( i & 0x80000001 )
         {
-            // rootn ( +-0,  n ) is +0 for even n > 0.
-            case 0: return 0.0L;
+            //rootn ( +-0,  n ) is +0 for even n > 0.
+            case 0:
+                return 0.0L;
 
-            // rootn ( +-0,  n ) is +-0 for odd n > 0.
-            case 1: return x;
+            //rootn ( +-0,  n ) is +-0 for odd n > 0.
+            case 1:
+                return x;
 
-            // rootn ( +-0,  n ) is +inf for even n < 0.
-            case 0x80000000: return INFINITY;
+            //rootn ( +-0,  n ) is +inf for even n < 0.
+            case 0x80000000:
+                return INFINITY;
 
-            // rootn ( +-0,  n ) is +-inf for odd n < 0.
-            case 0x80000001: return copysign(INFINITY, x);
+            //rootn ( +-0,  n ) is +-inf for odd n < 0.
+            case 0x80000001:
+                return copysign(INFINITY, x);
         }
     }
 
-    if (i == 1) return x;
+    if( i == 1 )
+        return x;
 
-    if (i == -1) return 1.0 / x;
+    if( i == -1 )
+        return 1.0 / x;
 
     long double sign = x;
     x = reference_fabsl(x);
@@ -3015,267 +2727,261 @@
     DivideDD(&iHi, &iLo, 1.0, i);
     x = reference_powl(x, iHi) * reference_powl(x, iLo);
 
-    return reference_copysignl(x, sign);
+    return reference_copysignl( x, sign );
+
 }
 
-long double reference_rsqrtl(long double x) { return 1.0L / sqrtl(x); }
-
-long double reference_sinpil(long double x)
+long double reference_rsqrtl( long double x){ return 1.0L / sqrtl(x); }
+//long double reference_sincosl( long double x, long double *c ){ *c = reference_cosl(x); return reference_sinl(x); }
+long double reference_sinpil( long double x)
 {
     double r = reduce1l(x);
 
     // reduce to [-0.5, 0.5]
-    if (r < -0.5L)
+    if( r < -0.5L )
         r = -1.0L - r;
-    else if (r > 0.5L)
+    else if ( r > 0.5L )
         r = 1.0L - r;
 
     // sinPi zeros have the same sign as x
-    if (r == 0.0L) return reference_copysignl(0.0L, x);
+    if( r == 0.0L )
+        return reference_copysignl(0.0L, x);
 
-    return reference_sinl(r * M_PIL);
+    return reference_sinl( r * M_PIL );
 }
 
-long double reference_tanpil(long double x)
+long double reference_tanpil( long double x)
 {
     // set aside the sign  (allows us to preserve sign of -0)
-    long double sign = reference_copysignl(1.0L, x);
+    long double sign = reference_copysignl( 1.0L, x);
     long double z = reference_fabsl(x);
 
     // if big and even  -- caution: only works if x only has single precision
-    if (z >= HEX_LDBL(+, 1, 0, +, 53))
+    if( z >= HEX_LDBL( +, 1, 0, +, 53 ) )
     {
-        if (z == INFINITY) return x - x; // nan
+        if( z == INFINITY )
+            return x - x;       // nan
 
-        return reference_copysignl(
-            0.0L, x); // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
+        return reference_copysignl( 0.0L, x);   // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
     }
 
     // reduce to the range [ -0.5, 0.5 ]
-    long double nearest =
-        reference_rintl(z); // round to nearest even places n + 0.5 values in
-                            // the right place for us
-    int64_t i =
-        (int64_t)nearest; // test above against 0x1.0p53 avoids overflow here
+    long double nearest = reference_rintl( z );     // round to nearest even places n + 0.5 values in the right place for us
+    int64_t i = (int64_t) nearest;          // test above against 0x1.0p53 avoids overflow here
     z -= nearest;
 
-    // correction for odd integer x for the right sign of zero
-    if ((i & 1) && z == 0.0L) sign = -sign;
+    //correction for odd integer x for the right sign of zero
+    if( (i&1) && z == 0.0L )
+        sign = -sign;
 
     // track changes to the sign
-    sign *= reference_copysignl(1.0L, z); // really should just be an xor
-    z = reference_fabsl(z); // remove the sign again
+    sign *= reference_copysignl(1.0L, z);       // really should just be an xor
+    z = reference_fabsl(z);                    // remove the sign again
 
     // reduce once more
-    // If we don't do this, rounding error in z * M_PI will cause us not to
-    // return infinities properly
-    if (z > 0.25L)
+    // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly
+    if( z > 0.25L )
     {
         z = 0.5L - z;
-        return sign
-            / reference_tanl(z
-                             * M_PIL); // use system tan to get the right result
+        return sign / reference_tanl( z * M_PIL );      // use system tan to get the right result
     }
 
     //
-    return sign
-        * reference_tanl(z * M_PIL); // use system tan to get the right result
+    return sign * reference_tanl( z * M_PIL );          // use system tan to get the right result
 }
 
-long double reference_pownl(long double x, int i)
+long double reference_pownl( long double x, int i ){ return reference_powl( x, (long double) i ); }
+
+long double reference_powrl( long double x, long double y )
 {
-    return reference_powl(x, (long double)i);
-}
+    //powr ( x, y ) returns NaN for x < 0.
+    if( x < 0.0L )
+        return cl_make_nan();
 
-long double reference_powrl(long double x, long double y)
-{
-    // powr ( x, y ) returns NaN for x < 0.
-    if (x < 0.0L) return cl_make_nan();
+    //powr ( x, NaN ) returns the NaN for x >= 0.
+    //powr ( NaN, y ) returns the NaN.
+    if( isnan(x) || isnan(y) )
+        return x + y;   // Note: behavior different here than for pow(1,NaN), pow(NaN, 0)
 
-    // powr ( x, NaN ) returns the NaN for x >= 0.
-    // powr ( NaN, y ) returns the NaN.
-    if (isnan(x) || isnan(y))
-        return x + y; // Note: behavior different here than for pow(1,NaN),
-                      // pow(NaN, 0)
-
-    if (x == 1.0L)
+    if( x == 1.0L )
     {
-        // powr ( +1, +-inf ) returns NaN.
-        if (reference_fabsl(y) == INFINITY) return cl_make_nan();
+        //powr ( +1, +-inf ) returns NaN.
+        if( reference_fabsl(y) == INFINITY )
+            return cl_make_nan();
 
-        // powr ( +1, y ) is 1 for finite y.    (NaN handled above)
+        //powr ( +1, y ) is 1 for finite y.    (NaN handled above)
         return 1.0L;
     }
 
-    if (y == 0.0L)
+    if( y == 0.0L )
     {
-        // powr ( +inf, +-0 ) returns NaN.
-        // powr ( +-0, +-0 ) returns NaN.
-        if (x == 0.0L || x == INFINITY) return cl_make_nan();
+        //powr ( +inf, +-0 ) returns NaN.
+        //powr ( +-0, +-0 ) returns NaN.
+        if( x == 0.0L || x == INFINITY )
+            return cl_make_nan();
 
-        // powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already
-        // handled above)
+        //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
         return 1.0L;
     }
 
-    if (x == 0.0L)
+    if( x == 0.0L )
     {
-        // powr ( +-0, -inf) is +inf.
-        // powr ( +-0, y ) is +inf for finite y < 0.
-        if (y < 0.0L) return INFINITY;
+        //powr ( +-0, -inf) is +inf.
+        //powr ( +-0, y ) is +inf for finite y < 0.
+        if( y < 0.0L )
+            return INFINITY;
 
-        // powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+        //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
         return 0.0L;
     }
 
-    return reference_powl(x, y);
+    return reference_powl( x, y );
 }
 
-long double reference_addl(long double x, long double y)
+//long double my_fdiml( long double x, long double y){ return fdim( (double) x, (double) y ); }
+long double reference_addl( long double x, long double y)
 {
-    volatile double a = (double)x;
-    volatile double b = (double)y;
+    volatile double a = (double) x;
+    volatile double b = (double) y;
 
-#if defined(__SSE2__)
+#if defined( __SSE2__ )
     // defeat x87
-    __m128d va = _mm_set_sd((double)a);
-    __m128d vb = _mm_set_sd((double)b);
-    va = _mm_add_sd(va, vb);
-    _mm_store_sd((double *)&a, va);
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_add_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
 #else
     a += b;
 #endif
-    return (long double)a;
+    return (long double) a;
 }
 
-long double reference_subtractl(long double x, long double y)
+long double reference_subtractl( long double x, long double y)
 {
-    volatile double a = (double)x;
-    volatile double b = (double)y;
+    volatile double a = (double) x;
+    volatile double b = (double) y;
 
-#if defined(__SSE2__)
+#if defined( __SSE2__ )
     // defeat x87
-    __m128d va = _mm_set_sd((double)a);
-    __m128d vb = _mm_set_sd((double)b);
-    va = _mm_sub_sd(va, vb);
-    _mm_store_sd((double *)&a, va);
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_sub_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
 #else
     a -= b;
 #endif
-    return (long double)a;
+    return (long double) a;
 }
 
-long double reference_multiplyl(long double x, long double y)
+long double reference_multiplyl( long double x, long double y)
 {
-    volatile double a = (double)x;
-    volatile double b = (double)y;
+    volatile double a = (double) x;
+    volatile double b = (double) y;
 
-#if defined(__SSE2__)
+#if defined( __SSE2__ )
     // defeat x87
-    __m128d va = _mm_set_sd((double)a);
-    __m128d vb = _mm_set_sd((double)b);
-    va = _mm_mul_sd(va, vb);
-    _mm_store_sd((double *)&a, va);
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_mul_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
 #else
     a *= b;
 #endif
-    return (long double)a;
+    return (long double) a;
 }
 
-long double reference_lgamma_rl(long double x, int *signp)
+/*long double my_remquol( long double x, long double y, int *iptr )
 {
+    if( isnan(x) || isnan(y) ||
+        fabs(x) == INFINITY  ||
+        y == 0.0 )
+    {
+        *iptr = 0;
+        return NAN;
+    }
+
+    return remquo( (double) x, (double) y, iptr );
+}*/
+long double reference_lgamma_rl( long double x, int *signp )
+{
+//    long double lgamma_val = (long double)reference_lgamma( (double)x );
+//    *signp = signgam;
     *signp = 0;
     return x;
 }
 
-int reference_isequall(long double x, long double y) { return x == y; }
-int reference_isfinitel(long double x) { return 0 != isfinite(x); }
-int reference_isgreaterl(long double x, long double y) { return x > y; }
-int reference_isgreaterequall(long double x, long double y) { return x >= y; }
-int reference_isinfl(long double x) { return 0 != isinf(x); }
-int reference_islessl(long double x, long double y) { return x < y; }
-int reference_islessequall(long double x, long double y) { return x <= y; }
-#if defined(__INTEL_COMPILER)
-int reference_islessgreaterl(long double x, long double y)
-{
-    return 0 != islessgreaterl(x, y);
-}
+
+int reference_isequall( long double x, long double y){ return x == y; }
+int reference_isfinitel( long double x){ return 0 != isfinite(x); }
+int reference_isgreaterl( long double x, long double y){ return x > y; }
+int reference_isgreaterequall( long double x, long double y){ return x >= y; }
+int reference_isinfl( long double x){ return 0 != isinf(x); }
+int reference_islessl( long double x, long double y){ return x < y; }
+int reference_islessequall( long double x, long double y){ return x <= y; }
+int reference_islessgreaterl( long double x, long double y){  return 0 != islessgreater( x, y ); }
+int reference_isnanl( long double x){ return 0 != isnan( x ); }
+int reference_isnormall( long double x){ return 0 != isnormal( (double) x ); }
+int reference_isnotequall( long double x, long double y){ return x != y; }
+int reference_isorderedl( long double x, long double y){ return x == x && y == y; }
+int reference_isunorderedl( long double x, long double y){ return isnan(x) || isnan( y ); }
+#if defined( __INTEL_COMPILER )
+int reference_signbitl( long double x){ return 0 != signbitl( x ); }
 #else
-int reference_islessgreaterl(long double x, long double y)
-{
-    return 0 != islessgreater(x, y);
-}
+int reference_signbitl( long double x){ return 0 != signbit( x ); }
 #endif
-int reference_isnanl(long double x) { return 0 != isnan(x); }
-int reference_isnormall(long double x) { return 0 != isnormal((double)x); }
-int reference_isnotequall(long double x, long double y) { return x != y; }
-int reference_isorderedl(long double x, long double y)
-{
-    return x == x && y == y;
-}
-int reference_isunorderedl(long double x, long double y)
-{
-    return isnan(x) || isnan(y);
-}
-#if defined(__INTEL_COMPILER)
-int reference_signbitl(long double x) { return 0 != signbitl(x); }
-#else
-int reference_signbitl(long double x) { return 0 != signbit(x); }
-#endif
-long double reference_copysignl(long double x, long double y);
-long double reference_roundl(long double x);
+long double reference_copysignl( long double x, long double y);
+long double reference_roundl( long double x );
 long double reference_cbrtl(long double x);
 
-long double reference_copysignl(long double x, long double y)
+long double reference_copysignl( long double x, long double y )
 {
-    // We hope that the long double to double conversion proceeds with sign
-    // fidelity, even for zeros and NaNs
-    union {
-        double d;
-        cl_ulong u;
-    } u;
-    u.d = (double)y;
+    // We hope that the long double to double conversion proceeds with sign fidelity,
+    // even for zeros and NaNs
+    union{ double d; cl_ulong u;}u; u.d = (double) y;
 
     x = reference_fabsl(x);
-    if (u.u >> 63) x = -x;
+    if( u.u >> 63 )
+        x = -x;
 
     return x;
 }
 
-long double reference_roundl(long double x)
+long double reference_roundl( long double x )
 {
     // Since we are just using this to verify double precision, we can
     // use the double precision copysign here
 
 #if defined(__MINGW32__) && defined(__x86_64__)
     long double absx = reference_fabsl(x);
-    if (absx < 0.5L) return reference_copysignl(0.0L, x);
+    if (absx < 0.5L)
+    return reference_copysignl(0.0L, x);
 #endif
-    return round((double)x);
+    return round( (double) x );
 }
 
-long double reference_truncl(long double x)
+long double reference_truncl( long double x )
 {
     // Since we are just using this to verify double precision, we can
     // use the double precision copysign here
-    return trunc((double)x);
+    return trunc( (double) x );
 }
 
 static long double reference_scalblnl(long double x, long n);
 
 long double reference_cbrtl(long double x)
 {
-    double yhi = HEX_DBL(+, 1, 5555555555555, -, 2);
-    double ylo = HEX_DBL(+, 1, 558, -, 56);
+    double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 );
+    double ylo = HEX_DBL( +, 1, 558, -, 56 );
 
-    double fabsx = reference_fabs(x);
+    double fabsx = reference_fabs( x );
 
-    if (isnan(x) || fabsx == 1.0 || fabsx == 0.0 || isinf(x)) return x;
+    if( isnan(x) || fabsx == 1.0 || fabsx == 0.0 || isinf(x) )
+        return x;
 
+    double iy = 0.0;
     double log2x_hi, log2x_lo;
 
-    // extended precision log .... accurate to at least 64-bits + couple of
-    // guard bits
+    // extended precision log .... accurate to at least 64-bits + couple of guard bits
     __log2_ep(&log2x_hi, &log2x_lo, fabsx);
 
     double ylog2x_hi, ylog2x_lo;
@@ -3287,44 +2993,79 @@
     MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
 
     long double powxy;
-    if (isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200))
-    {
-        powxy =
-            reference_signbit(ylog2x_hi) ? HEX_DBL(+, 0, 0, +, 0) : INFINITY;
-    }
-    else
-    {
+    if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) {
+        powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY;
+    } else {
         // separate integer + fractional part
         long int m = lrint(ylog2x_hi);
         AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0);
 
         // revert to long double arithemtic
-        long double ylog2x = (long double)ylog2x_hi + (long double)ylog2x_lo;
-        powxy = reference_exp2l(ylog2x);
+        long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo;
+        powxy = reference_exp2l( ylog2x );
         powxy = reference_scalblnl(powxy, m);
     }
 
-    return reference_copysignl(powxy, x);
+    return reference_copysignl( powxy, x );
 }
 
-long double reference_rintl(long double x)
+/*
+long double scalbnl( long double x, int i )
+{
+    //suitable for checking double precision scalbn only
+
+    if( i > 3000 )
+        return copysignl( INFINITY, x);
+    if( i < -3000 )
+        return copysignl( 0.0L, x);
+
+    if( i > 0 )
+    {
+        while( i >= 1000 )
+        {
+            x *= HEX_LDBL( +, 1, 0, +, 1000 );
+            i -= 1000;
+        }
+
+        union{ cl_ulong u; double d;}u;
+        u.u = (cl_ulong)( i + 1023 ) << 52;
+        x *= (long double) u.d;
+    }
+    else if( i < 0 )
+    {
+        while( i <= -1000 )
+        {
+            x *= HEX_LDBL( +, 1, 0, -, 1000 );
+            i += 1000;
+        }
+
+        union{ cl_ulong u; double d;}u;
+        u.u = (cl_ulong)( i + 1023 ) << 52;
+        x *= (long double) u.d;
+    }
+
+    return x;
+}
+*/
+
+long double reference_rintl( long double x )
 {
 #if defined(__PPC__)
-    // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
-    // mantissa can represent more than LDBL_MANT_DIG binary digits.
-    x = rintl(x);
+  // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
+  // mantissa can represent more than LDBL_MANT_DIG binary digits.
+  x = rintl(x);
 #else
-    static long double magic[2] = { 0.0L, 0.0L };
+    static long double magic[2] = { 0.0L, 0.0L};
 
-    if (0.0L == magic[0])
+    if( 0.0L == magic[0] )
     {
         magic[0] = scalbnl(0.5L, LDBL_MANT_DIG);
         magic[1] = scalbnl(-0.5L, LDBL_MANT_DIG);
     }
 
-    if (reference_fabsl(x) < magic[0] && x != 0.0L)
+    if( reference_fabsl(x) < magic[0] && x != 0.0L )
     {
-        long double m = magic[x < 0];
+        long double m = magic[ x < 0 ];
         x += m;
         x -= m;
     }
@@ -3337,7 +3078,7 @@
 static void __sqrt_ep(double *rhi, double *rlo, double xhi, double xlo)
 {
     // approximate reciprocal sqrt
-    double thi = 1.0 / sqrt(xhi);
+    double thi = 1.0 / sqrt( xhi );
     double tlo = 0.0;
 
     // One newton iteration in double-double
@@ -3351,31 +3092,34 @@
     MulDD(rhi, rlo, yhi, ylo, xhi, xlo);
 }
 
-long double reference_acoshl(long double x)
+long double reference_acoshl( long double x )
 {
-    /*
-     * ====================================================
-     * This function derived from fdlibm http://www.netlib.org
-     * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     *
-     */
-    if (isnan(x) || isinf(x)) return x + fabsl(x);
+/*
+ * ====================================================
+ * This function derived from fdlibm http://www.netlib.org
+ * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ */
+    if( isnan(x) || isinf(x))
+        return x + fabsl(x);
 
-    if (x < 1.0L) return cl_make_nan();
+    if( x < 1.0L )
+        return cl_make_nan();
 
-    if (x == 1.0L) return 0.0L;
+    if( x == 1.0L )
+        return 0.0L;
 
-    if (x > HEX_LDBL(+, 1, 0, +, 60))
+    if( x > HEX_LDBL( +, 1, 0, +, 60 ) )
         return reference_logl(x) + 0.693147180559945309417232121458176568L;
 
-    if (x > 2.0L)
-        return reference_logl(2.0L * x - 1.0L / (x + sqrtl(x * x - 1.0L)));
+    if( x > 2.0L )
+        return reference_logl(2.0L * x - 1.0L / (x + sqrtl(x*x - 1.0L)));
 
     double hi, lo;
     MulD(&hi, &lo, x, x);
@@ -3384,290 +3128,286 @@
     AddDD(&hi, &lo, hi, lo, x, 0.0);
     double correction = lo / hi;
     __log2_ep(&hi, &lo, hi);
-    double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1);
-    double log2Lo = HEX_DBL(+, 1, abc9e3b39803f, -, 56);
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
+    double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 );
     MulDD(&hi, &lo, hi, lo, log2Hi, log2Lo);
     AddDD(&hi, &lo, hi, lo, correction, 0.0);
 
     return hi + lo;
 }
 
-long double reference_asinhl(long double x)
+long double reference_asinhl( long double x )
 {
     long double cutoff = 0.0L;
-    const long double ln2 = HEX_LDBL(+, b, 17217f7d1cf79ab, -, 4);
+    const long double ln2 = HEX_LDBL( +, b, 17217f7d1cf79ab, -, 4 );
 
-    if (cutoff == 0.0L) cutoff = reference_ldexpl(1.0L, -LDBL_MANT_DIG);
+    if( cutoff == 0.0L )
+        cutoff = reference_ldexpl(1.0L, -LDBL_MANT_DIG);
 
-    if (isnan(x) || isinf(x)) return x + x;
+    if( isnan(x) || isinf(x) )
+        return x + x;
 
     long double absx = reference_fabsl(x);
-    if (absx < cutoff) return x;
+    if( absx < cutoff )
+        return x;
 
     long double sign = reference_copysignl(1.0L, x);
 
-    if (absx <= 4.0 / 3.0)
-    {
-        return sign
-            * reference_log1pl(absx + x * x / (1.0 + sqrtl(1.0 + x * x)));
+    if( absx <= 4.0/3.0 ) {
+        return sign * reference_log1pl( absx + x*x / (1.0 + sqrtl(1.0 + x*x)));
     }
-    else if (absx <= HEX_LDBL(+, 1, 0, +, 27))
-    {
-        return sign
-            * reference_logl(2.0L * absx + 1.0L / (sqrtl(x * x + 1.0) + absx));
+    else if( absx <= HEX_LDBL( +, 1, 0, +, 27 ) ) {
+        return sign * reference_logl( 2.0L * absx + 1.0L / (sqrtl( x * x + 1.0 ) + absx));
     }
-    else
-    {
-        return sign * (reference_logl(absx) + ln2);
+    else {
+        return sign * ( reference_logl( absx ) + ln2 );
     }
 }
 
-long double reference_atanhl(long double x)
+long double reference_atanhl( long double x )
 {
-    /*
-     * ====================================================
-     * This function is from fdlibm: http://www.netlib.org
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (isnan(x)) return x + x;
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x)  )
+        return x + x;
 
-    long double signed_half = reference_copysignl(0.5L, x);
+    long double signed_half = reference_copysignl( 0.5L, x );
     x = reference_fabsl(x);
-    if (x > 1.0L) return cl_make_nan();
+    if( x > 1.0L )
+        return cl_make_nan();
 
-    if (x < 0.5L)
-        return signed_half * reference_log1pl(2.0L * (x + x * x / (1 - x)));
+    if( x < 0.5L )
+        return signed_half * reference_log1pl( 2.0L * ( x + x*x / (1-x) ) );
 
-    return signed_half * reference_log1pl(2.0L * x / (1 - x));
+    return signed_half * reference_log1pl(2.0L * x / (1-x));
 }
 
-long double reference_exp2l(long double z)
+long double reference_exp2l(  long double z)
 {
     double_double x;
     int j;
 
     // Handle NaNs
-    if (isnan(z)) return z;
+    if( isnan(z) )
+        return z;
 
     // init x
     x.hi = z;
     x.lo = z - x.hi;
 
-    // Deal with overflow and underflow for exp2(x) stage next
-    if (x.hi >= 1025) return INFINITY;
+    //Deal with overflow and underflow for exp2(x) stage next
+    if( x.hi >= 1025 )
+        return INFINITY;
 
-    if (x.hi < -1075 - 24) return +0.0;
+    if( x.hi < -1075-24 )
+        return +0.0;
 
     // find nearest integer to x
-    int i = (int)rint(x.hi);
+    int i = (int) rint(x.hi);
 
     // x now holds fractional part.  The result would be then 2**i  * exp2( x )
     x.hi -= i;
 
-    // We could attempt to find a minimax polynomial for exp2(x) over the range
-    // x = [-0.5, 0.5]. However, this would converge very slowly near the
-    // extrema, where 0.5**n is not a lot different from 0.5**(n+1), thereby
-    // requiring something like a 20th order polynomial to get 53 + 24 bits of
-    // precision. Instead we further reduce the range to [-1/32, 1/32] by
-    // observing that
+    // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5].
+    // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different
+    // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits
+    // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that
     //
     //  2**(a+b) = 2**a * 2**b
     //
-    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and
-    // reduce the range of x to [-1/32, 1/32] by subtracting away the nearest
-    // value of n/16 from x.
-    const double_double corrections[17] = {
-        { HEX_DBL(+, 1, 6a09e667f3bcd, -, 1),
-          HEX_DBL(-, 1, bdd3413b26456, -, 55) },
-        { HEX_DBL(+, 1, 7a11473eb0187, -, 1),
-          HEX_DBL(-, 1, 41577ee04992f, -, 56) },
-        { HEX_DBL(+, 1, 8ace5422aa0db, -, 1),
-          HEX_DBL(+, 1, 6e9f156864b27, -, 55) },
-        { HEX_DBL(+, 1, 9c49182a3f09, -, 1),
-          HEX_DBL(+, 1, c7c46b071f2be, -, 57) },
-        { HEX_DBL(+, 1, ae89f995ad3ad, -, 1),
-          HEX_DBL(+, 1, 7a1cd345dcc81, -, 55) },
-        { HEX_DBL(+, 1, c199bdd85529c, -, 1),
-          HEX_DBL(+, 1, 11065895048dd, -, 56) },
-        { HEX_DBL(+, 1, d5818dcfba487, -, 1),
-          HEX_DBL(+, 1, 2ed02d75b3707, -, 56) },
-        { HEX_DBL(+, 1, ea4afa2a490da, -, 1),
-          HEX_DBL(-, 1, e9c23179c2893, -, 55) },
-        { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-        { HEX_DBL(+, 1, 0b5586cf9890f, +, 0),
-          HEX_DBL(+, 1, 8a62e4adc610b, -, 54) },
-        { HEX_DBL(+, 1, 172b83c7d517b, +, 0),
-          HEX_DBL(-, 1, 19041b9d78a76, -, 55) },
-        { HEX_DBL(+, 1, 2387a6e756238, +, 0),
-          HEX_DBL(+, 1, 9b07eb6c70573, -, 54) },
-        { HEX_DBL(+, 1, 306fe0a31b715, +, 0),
-          HEX_DBL(+, 1, 6f46ad23182e4, -, 55) },
-        { HEX_DBL(+, 1, 3dea64c123422, +, 0),
-          HEX_DBL(+, 1, ada0911f09ebc, -, 55) },
-        { HEX_DBL(+, 1, 4bfdad5362a27, +, 0),
-          HEX_DBL(+, 1, d4397afec42e2, -, 56) },
-        { HEX_DBL(+, 1, 5ab07dd485429, +, 0),
-          HEX_DBL(+, 1, 6324c054647ad, -, 54) },
-        { HEX_DBL(+, 1, 6a09e667f3bcd, +, 0),
-          HEX_DBL(-, 1, bdd3413b26456, -, 54) }
+    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range
+    // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x.
+    const double_double corrections[17] =
+    {
+        { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) },
+        { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) },
+        { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) },
+        { HEX_DBL( +, 1, 9c49182a3f09,  -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) },
+        { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) },
+        { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) },
+        { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) },
+        { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) },
+        { HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ) },
+        { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) },
+        { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) },
+        { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) },
+        { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) },
+        { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) },
+        { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) },
+        { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) },
+        { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) }
     };
-    int index = (int)rint(x.hi * 16.0);
-    x.hi -= (double)index * 0.0625;
+    int index = (int) rint( x.hi * 16.0 );
+    x.hi -= (double) index * 0.0625;
 
     // canonicalize x
     double temp = x.hi;
     x.hi += x.lo;
     x.lo -= x.hi - temp;
 
-    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max
-    // Error: 2 * 0x1.e112p-87
-    const double_double c[] = { { HEX_DBL(+, 1, 62e42fefa39ef, -, 1),
-                                  HEX_DBL(+, 1, abc9e3ac1d244, -, 56) },
-                                { HEX_DBL(+, 1, ebfbdff82c58f, -, 3),
-                                  HEX_DBL(-, 1, 5e4987a631846, -, 57) },
-                                { HEX_DBL(+, 1, c6b08d704a0c, -, 5),
-                                  HEX_DBL(-, 1, d323200a05713, -, 59) },
-                                { HEX_DBL(+, 1, 3b2ab6fba4e7a, -, 7),
-                                  HEX_DBL(+, 1, c5ee8f8b9f0c1, -, 63) },
-                                { HEX_DBL(+, 1, 5d87fe78a672a, -, 10),
-                                  HEX_DBL(+, 1, 884e5e5cc7ecc, -, 64) },
-                                { HEX_DBL(+, 1, 430912f7e8373, -, 13),
-                                  HEX_DBL(+, 1, 4f1b59514a326, -, 67) },
-                                { HEX_DBL(+, 1, ffcbfc5985e71, -, 17),
-                                  HEX_DBL(-, 1, db7d6a0953b78, -, 71) },
-                                { HEX_DBL(+, 1, 62c150eb16465, -, 20),
-                                  HEX_DBL(+, 1, e0767c2d7abf5, -, 80) },
-                                { HEX_DBL(+, 1, b52502b5e953, -, 24),
-                                  HEX_DBL(+, 1, 6797523f944bc, -, 78) } };
-    size_t count = sizeof(c) / sizeof(c[0]);
+    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max Error: 2 * 0x1.e112p-87
+    const double_double c[] = {
+        {HEX_DBL( +, 1, 62e42fefa39ef, -,  1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )},
+        {HEX_DBL( +, 1, ebfbdff82c58f, -,  3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )},
+        {HEX_DBL( +, 1, c6b08d704a0c,  -,  5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )},
+        {HEX_DBL( +, 1, 3b2ab6fba4e7a, -,  7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )},
+        {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )},
+        {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )},
+        {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )},
+        {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )},
+        {HEX_DBL( +, 1, b52502b5e953,  -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )}
+    };
+    size_t count = sizeof( c ) / sizeof( c[0] );
 
     // Do polynomial
-    double_double r = c[count - 1];
-    for (j = (int)count - 2; j >= 0; j--) r = add_dd(c[j], mul_dd(r, x));
+    double_double r = c[count-1];
+    for( j = (int) count-2; j >= 0; j-- )
+        r = add_dd( c[j], mul_dd( r, x ) );
 
     // unwind approximation
-    r = mul_dd(r, x); // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
+    r = mul_dd( r, x );     // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
 
     // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above
     //  exp2(x) = (r + 1) * correction = r * correction + correction
-    r = mul_dd(r, corrections[index + 8]);
-    r = add_dd(r, corrections[index + 8]);
+    r = mul_dd( r, corrections[index+8] );
+    r = add_dd( r, corrections[index+8] );
 
-    // Format result for output:
+// Format result for output:
 
     // Get mantissa
-    long double m = ((long double)r.hi + (long double)r.lo);
+    long double m = ((long double) r.hi + (long double) r.lo );
 
     // Handle a pesky overflow cases when long double = double
-    if (i > 512)
+    if( i > 512 )
     {
-        m *= HEX_DBL(+, 1, 0, +, 512);
+        m *= HEX_DBL( +, 1, 0, +, 512 );
         i -= 512;
     }
-    else if (i < -512)
+    else if( i < -512 )
     {
-        m *= HEX_DBL(+, 1, 0, -, 512);
+        m *= HEX_DBL( +, 1, 0, -, 512 );
         i += 512;
     }
 
-    return m * ldexpl(1.0L, i);
+    return m * ldexpl( 1.0L, i );
 }
 
-long double reference_expm1l(long double x)
+long double reference_expm1l(  long double x)
 {
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // unimplemented
+#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    //unimplemented
     return x;
 #else
-    if (reference_isnanl(x)) return x;
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
 
-    if (x > 710) return INFINITY;
+    if (reference_isnanl(x))
+        return x;
+
+    if ( x > 710 )
+        return INFINITY;
 
     long double y = expm1l(x);
 
     // Range of expm1l is -1.0L to +inf. Negative inf
     // on a few Linux platforms is clearly the wrong sign.
-    if (reference_isinfl(y)) y = INFINITY;
+    if (reference_isinfl(y))
+        y = INFINITY;
 
     return y;
 #endif
 }
 
-long double reference_fmaxl(long double x, long double y)
+long double reference_fmaxl( long double x, long double y )
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x >= y ? x : y;
 }
 
-long double reference_fminl(long double x, long double y)
+long double reference_fminl( long double x, long double y )
 {
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
 
     return x <= y ? x : y;
 }
 
-long double reference_hypotl(long double x, long double y)
+long double reference_hypotl( long double x, long double y )
 {
-    static const double tobig = HEX_DBL(+, 1, 0, +, 511);
-    static const double big = HEX_DBL(+, 1, 0, +, 513);
-    static const double rbig = HEX_DBL(+, 1, 0, -, 513);
-    static const double tosmall = HEX_DBL(+, 1, 0, -, 511);
-    static const double smalll = HEX_DBL(+, 1, 0, -, 607);
-    static const double rsmall = HEX_DBL(+, 1, 0, +, 607);
+  static const double tobig = HEX_DBL( +, 1, 0, +, 511 );
+  static const double big = HEX_DBL( +, 1, 0, +, 513 );
+  static const double rbig = HEX_DBL( +, 1, 0, -, 513 );
+  static const double tosmall = HEX_DBL( +, 1, 0, -, 511 );
+  static const double smalll = HEX_DBL( +, 1, 0, -, 607 );
+  static const double rsmall = HEX_DBL( +, 1, 0, +, 607 );
 
     long double max, min;
 
-    if (isinf(x) || isinf(y)) return INFINITY;
+    if( isinf(x) || isinf(y) )
+        return INFINITY;
 
-    if (isnan(x) || isnan(y)) return x + y;
+    if( isnan(x) || isnan(y) )
+        return x + y;
 
     x = reference_fabsl(x);
     y = reference_fabsl(y);
 
-    max = reference_fmaxl(x, y);
-    min = reference_fminl(x, y);
+    max = reference_fmaxl( x, y );
+    min = reference_fminl( x, y );
 
-    if (max > tobig)
+  if( max > tobig )
     {
         max *= rbig;
         min *= rbig;
-        return big * sqrtl(max * max + min * min);
+        return big * sqrtl( max * max + min * min );
     }
 
-    if (max < tosmall)
+  if( max < tosmall )
     {
         max *= rsmall;
         min *= rsmall;
-        return smalll * sqrtl(max * max + min * min);
+      return smalll * sqrtl( max * max + min * min );
     }
-    return sqrtl(x * x + y * y);
+    return sqrtl( x * x + y * y );
 }
 
-long double reference_log2l(long double x)
+//long double reference_log2l( long double x )
+//{
+//    return log( x ) * 1.44269504088896340735992468100189214L;
+//}
+
+long double reference_log2l( long double x )
 {
-    if (isnan(x) || x < 0.0 || x == -INFINITY) return NAN;
+    if( isnan(x) || x < 0.0 || x == -INFINITY)
+        return NAN;
 
-    if (x == 0.0f) return -INFINITY;
+    if( x == 0.0f)
+        return -INFINITY;
 
-    if (x == INFINITY) return INFINITY;
+    if( x == INFINITY )
+        return INFINITY;
 
     double hi, lo;
-    __log2_ep(&hi, &lo, x);
+    __log2_ep( &hi, &lo, x);
 
-    return (long double)hi + (long double)lo;
+    return (long double) hi + (long double) lo;
 }
 
-long double reference_log1pl(long double x)
+long double reference_log1pl(  long double x)
 {
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // unimplemented
+#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    //unimplemented
     return x;
 #elif defined(__PPC__)
     // log1pl on PPC inadvertantly returns NaN for very large values. Work
@@ -3678,24 +3418,23 @@
 #endif
 }
 
-long double reference_logbl(long double x)
+long double reference_logbl( long double x )
 {
     // Since we are just using this to verify double precision, we can
     // use the double precision copysign here
-    union {
-        double f;
-        cl_ulong u;
-    } u;
-    u.f = (double)x;
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
 
     cl_int exponent = (cl_uint)(u.u >> 52) & 0x7ff;
-    if (exponent == 0x7ff) return x * x;
+    if( exponent == 0x7ff )
+        return x * x;
 
-    if (exponent == 0)
-    { // deal with denormals
-        u.f = x * HEX_DBL(+, 1, 0, +, 64);
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f =  x * HEX_DBL( +, 1, 0, +, 64 );
         exponent = (cl_int)(u.u >> 52) & 0x7ff;
-        if (exponent == 0) return -INFINITY;
+        if( exponent == 0 )
+            return -INFINITY;
 
         return exponent - (1023 + 64);
     }
@@ -3703,82 +3442,84 @@
     return exponent - 1023;
 }
 
-long double reference_maxmagl(long double x, long double y)
+long double reference_maxmagl( long double x, long double y )
 {
     long double fabsx = fabsl(x);
     long double fabsy = fabsl(y);
 
-    if (fabsx < fabsy) return y;
+    if( fabsx < fabsy )
+        return y;
 
-    if (fabsy < fabsx) return x;
+    if( fabsy < fabsx )
+        return x;
 
     return reference_fmaxl(x, y);
 }
 
-long double reference_minmagl(long double x, long double y)
+long double reference_minmagl( long double x, long double y )
 {
     long double fabsx = fabsl(x);
     long double fabsy = fabsl(y);
 
-    if (fabsx > fabsy) return y;
+    if( fabsx > fabsy )
+        return y;
 
-    if (fabsy > fabsx) return x;
+    if( fabsy > fabsx )
+        return x;
 
     return reference_fminl(x, y);
 }
 
-long double reference_nanl(cl_ulong x)
+long double reference_nanl( cl_ulong x )
 {
-    union {
-        cl_ulong u;
-        cl_double f;
-    } u;
+    union{ cl_ulong u; cl_double f; }u;
     u.u = x | 0x7ff8000000000000ULL;
-    return (long double)u.f;
+    return (long double) u.f;
 }
 
 
-long double reference_reciprocall(long double x) { return 1.0L / x; }
+long double reference_reciprocall( long double x )
+{
+    return 1.0L / x;
+}
 
-long double reference_remainderl(long double x, long double y)
+long double reference_remainderl( long double x, long double y );
+long double reference_remainderl( long double x, long double y )
 {
     int i;
-    return reference_remquol(x, y, &i);
+    return reference_remquol( x, y, &i );
 }
 
-long double reference_lgammal(long double x)
+long double reference_lgammal( long double x);
+long double reference_lgammal( long double x)
 {
     // lgamma is currently not tested
-    return reference_lgamma(x);
+    return reference_lgamma( x );
 }
 
-static uint32_t two_over_pi[] = {
-    0x0,        0x28be60db, 0x24e44152, 0x27f09d5f, 0x11f534dd, 0x3036d8a5,
-    0x1993c439, 0x107f945,  0x23abdebb, 0x31586dc9, 0x6e3a424,  0x374b8019,
-    0x92eea09,  0x3464873f, 0x21deb1cb, 0x4a69cfb,  0x288235f5, 0xbaed121,
-    0xe99c702,  0x1ad17df9, 0x13991d6,  0xe60d4ce,  0x1f49c845, 0x3e2ef7e4,
-    0x283b1ff8, 0x25fff781, 0x1980fef2, 0x3c462d68, 0xa6d1f6d,  0xd9fb3c9,
-    0x3cb09b74, 0x3d18fd9a, 0x1e5fea2d, 0x1d49eeb1, 0x3ebe5f17, 0x2cf41ce7,
-    0x378a5292, 0x3a9afed7, 0x3b11f8d5, 0x3421580c, 0x3046fc7b, 0x1aeafc33,
-    0x3bc209af, 0x10d876a7, 0x2391615e, 0x3986c219, 0x199855f1, 0x1281a102,
-    0xdffd880,  0x135cc9cc, 0x10606155
+static uint32_t two_over_pi[] = { 0x0, 0x28be60db, 0x24e44152, 0x27f09d5f, 0x11f534dd, 0x3036d8a5, 0x1993c439, 0x107f945, 0x23abdebb, 0x31586dc9,
+0x6e3a424, 0x374b8019, 0x92eea09, 0x3464873f, 0x21deb1cb, 0x4a69cfb, 0x288235f5, 0xbaed121, 0xe99c702, 0x1ad17df9,
+0x13991d6, 0xe60d4ce, 0x1f49c845, 0x3e2ef7e4, 0x283b1ff8, 0x25fff781, 0x1980fef2, 0x3c462d68, 0xa6d1f6d, 0xd9fb3c9,
+0x3cb09b74, 0x3d18fd9a, 0x1e5fea2d, 0x1d49eeb1, 0x3ebe5f17, 0x2cf41ce7, 0x378a5292, 0x3a9afed7, 0x3b11f8d5, 0x3421580c,
+0x3046fc7b, 0x1aeafc33, 0x3bc209af, 0x10d876a7, 0x2391615e, 0x3986c219, 0x199855f1, 0x1281a102, 0xdffd880, 0x135cc9cc,
+0x10606155
 };
 
-static uint32_t pi_over_two[] = { 0x1,        0x2487ed51, 0x42d1846,
-                                  0x26263314, 0x1701b839, 0x28948127 };
+static uint32_t pi_over_two[] = { 0x1, 0x2487ed51, 0x42d1846, 0x26263314, 0x1701b839, 0x28948127 };
 
-typedef union {
-    uint64_t u;
-    double d;
-} d_ui64_t;
+typedef union
+    {
+        uint64_t u;
+        double   d;
+    }d_ui64_t;
 
 // radix or base of representation
 #define RADIX (30)
 #define DIGITS 6
 
-d_ui64_t two_pow_pradix = { (uint64_t)(1023 + RADIX) << 52 };
-d_ui64_t two_pow_mradix = { (uint64_t)(1023 - RADIX) << 52 };
-d_ui64_t two_pow_two_mradix = { (uint64_t)(1023 - 2 * RADIX) << 52 };
+d_ui64_t two_pow_pradix = { (uint64_t) (1023 + RADIX) << 52 };
+d_ui64_t two_pow_mradix = { (uint64_t) (1023 - RADIX) << 52 };
+d_ui64_t two_pow_two_mradix = { (uint64_t) (1023-2*RADIX) << 52 };
 
 #define tp_pradix two_pow_pradix.d
 #define tp_mradix two_pow_mradix.d
@@ -3787,28 +3528,27 @@
 // floating point number.
 // x = sign * [ sum_{i = 0 to 2} ( X[i] * 2^(index - i)*RADIX ) ]
 typedef struct
-{
-    uint32_t X[3]; // three 32 bit integers are sufficient to represnt double in
-                   // base_30
-    int index; // exponent bias
-    int sign; // sign of double
-} eprep_t;
+    {
+        uint32_t X[3];        // three 32 bit integers are sufficient to represnt double in base_30
+        int index;            // exponent bias
+        int sign;            // sign of double
+    }eprep_t;
+
+static eprep_t double_to_eprep(double x);
 
 static eprep_t double_to_eprep(double x)
 {
     eprep_t result;
 
-    result.sign = (signbit(x) == 0) ? 1 : -1;
-    x = fabs(x);
+    result.sign = (signbit( x ) == 0) ? 1 : -1;
+    x = fabs( x );
 
     int index = 0;
-    while (x > tp_pradix)
-    {
+    while( x > tp_pradix ) {
         index++;
         x *= tp_mradix;
     }
-    while (x < 1)
-    {
+    while( x < 1 ) {
         index--;
         x *= tp_pradix;
     }
@@ -3816,125 +3556,190 @@
     result.index = index;
     int i = 0;
     result.X[0] = result.X[1] = result.X[2] = 0;
-    while (x != 0.0)
-    {
-        result.X[i] = (uint32_t)x;
-        x = (x - (double)result.X[i]) * tp_pradix;
+    while( x != 0.0 ) {
+        result.X[i] = (uint32_t) x;
+        x = (x - (double) result.X[i]) * tp_pradix;
         i++;
     }
     return result;
 }
 
-static double eprep_to_double(eprep_t epx)
+/*
+ double eprep_to_double( uint32_t *R, int digits, int index, int sgn )
+ {
+ d_ui64_t nb, rndcorr;
+ uint64_t lowpart, roundbits, t1;
+ int expo, expofinal, shift;
+ double res;
+
+ nb.d = (double) R[0];
+
+ t1   = R[1];
+ lowpart  = (t1 << RADIX) + R[2];
+ expo = ((nb.u & 0x7ff0000000000000ULL) >> 52) - 1023;
+
+ expofinal = expo + RADIX*index;
+
+ if (expofinal >  1023) {
+ d_ui64_t inf = { 0x7ff0000000000000ULL };
+ res = inf.d;
+ }
+
+ else if (expofinal >= -1022){
+ shift = expo + 2*RADIX - 53;
+ roundbits = lowpart << (64-shift);
+ lowpart = lowpart >> shift;
+ if (lowpart & 0x0000000000000001ULL) {
+ if(roundbits == 0) {
+ int i;
+ for (i=3; i < digits; i++)
+ roundbits = roundbits | R[i];
+ }
+ if(roundbits == 0) {
+ if (lowpart & 0x0000000000000002ULL)
+ rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52;
+ else
+ rndcorr.d = 0.0;
+ }
+ else
+ rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52;
+ }
+ else{
+ rndcorr.d = 0.0;
+ }
+
+ lowpart = lowpart >> 1;
+ nb.u = nb.u | lowpart;
+ res  = nb.d + rndcorr.d;
+
+ if(index*RADIX + 1023 > 0) {
+ nb.u = 0;
+ nb.u = (uint64_t) (index*RADIX + 1023) << 52;
+ res *= nb.d;
+ }
+ else {
+ nb.u = 0;
+ nb.u = (uint64_t) (index*RADIX + 1023 + 2*RADIX) << 52;
+ res *= two_pow_two_mradix.d;
+ res *= nb.d;
+ }
+ }
+ else {
+ if (expofinal < -1022 - 53 ) {
+ res = 0.0;
+ }
+ else {
+ lowpart = lowpart >> (expo + (2*RADIX) - 52);
+ nb.u = nb.u | lowpart;
+ nb.u = (nb.u & 0x000FFFFFFFFFFFFFULL) | 0x0010000000000000ULL;
+ nb.u = nb.u >> (-1023 - expofinal);
+ if(nb.u & 0x0000000000000001ULL)
+ rndcorr.u = 1;
+ else
+ rndcorr.d = 0.0;
+ res  = 0.5*(nb.d + rndcorr.d);
+ }
+ }
+
+ return sgn*res;
+ }
+ */
+static double eprep_to_double( eprep_t epx );
+
+static double eprep_to_double( eprep_t epx )
 {
     double res = 0.0;
 
-    res += ldexp((double)epx.X[0], (epx.index - 0) * RADIX);
-    res += ldexp((double)epx.X[1], (epx.index - 1) * RADIX);
-    res += ldexp((double)epx.X[2], (epx.index - 2) * RADIX);
+    res += ldexp((double) epx.X[0], (epx.index - 0)*RADIX);
+    res += ldexp((double) epx.X[1], (epx.index - 1)*RADIX);
+    res += ldexp((double) epx.X[2], (epx.index - 2)*RADIX);
 
     return copysign(res, epx.sign);
 }
 
-static int payne_hanek(double *y, int *exception)
+static int payne_hanek( double *y, int *exception );
+
+static int payne_hanek( double *y, int *exception )
 {
     double x = *y;
 
     // exception cases .. no reduction required
-    if (isnan(x) || isinf(x) || (fabs(x) <= M_PI_4))
-    {
+    if( isnan( x ) || isinf( x ) || (fabs( x ) <= M_PI_4) ) {
         *exception = 1;
         return 0;
     }
 
     *exception = 0;
 
-    // After computation result[0] contains integer part while
-    // result[1]....result[DIGITS-1] contain fractional part. So we are doing
-    // computation with (DIGITS-1)*RADIX precision. Default DIGITS=6 and
-    // RADIX=30 so default precision is 150 bits. Kahan-McDonald algorithm shows
-    // that a double precision x, closest to pi/2 is 6381956970095103 x 2^797
-    // which can cause 61 digits of cancellation in computation of f = x*2/pi -
-    // floor(x*2/pi) ... thus we need at least 114 bits (61 leading zeros + 53
-    // bits of mentissa of f) of precision to accurately compute f in double
-    // precision. Since we are using 150 bits (still an overkill), we should be
-    // safe. Extra bits can act as guard bits for correct rounding.
-    uint64_t result[DIGITS + 2];
+    // After computation result[0] contains integer part while result[1]....result[DIGITS-1]
+    // contain fractional part. So we are doing computation with (DIGITS-1)*RADIX precision.
+    // Default DIGITS=6 and RADIX=30 so default precision is 150 bits. Kahan-McDonald algorithm
+    // shows that a double precision x, closest to pi/2 is 6381956970095103 x 2^797 which can
+    // cause 61 digits of cancellation in computation of f = x*2/pi - floor(x*2/pi) ... thus we need
+    // at least 114 bits (61 leading zeros + 53 bits of mentissa of f) of precision to accurately compute
+    // f in double precision. Since we are using 150 bits (still an overkill), we should be safe. Extra
+    // bits can act as guard bits for correct rounding.
+    uint64_t result[DIGITS+2];
 
     // compute extended precision representation of x
-    eprep_t epx = double_to_eprep(x);
+    eprep_t epx = double_to_eprep( x );
     int index = epx.index;
     int i, j;
-    // extended precision multiplication of 2/pi*x .... we will loose at max two
-    // RADIX=30 bit digits in the worst case
-    for (i = 0; i < (DIGITS + 2); i++)
-    {
+    // extended precision multiplication of 2/pi*x .... we will loose at max two RADIX=30 bit digits in
+    // the worst case
+    for(i = 0; i < (DIGITS+2); i++) {
         result[i] = 0;
-        result[i] += ((index + i - 0) >= 0)
-            ? ((uint64_t)two_over_pi[index + i - 0] * (uint64_t)epx.X[0])
-            : 0;
-        result[i] += ((index + i - 1) >= 0)
-            ? ((uint64_t)two_over_pi[index + i - 1] * (uint64_t)epx.X[1])
-            : 0;
-        result[i] += ((index + i - 2) >= 0)
-            ? ((uint64_t)two_over_pi[index + i - 2] * (uint64_t)epx.X[2])
-            : 0;
+        result[i] += ((index + i - 0) >= 0) ? ((uint64_t) two_over_pi[index + i - 0] * (uint64_t) epx.X[0]) : 0;
+        result[i] += ((index + i - 1) >= 0) ? ((uint64_t) two_over_pi[index + i - 1] * (uint64_t) epx.X[1]) : 0;
+        result[i] += ((index + i - 2) >= 0) ? ((uint64_t) two_over_pi[index + i - 2] * (uint64_t) epx.X[2]) : 0;
     }
 
     // Carry propagation.
     uint64_t tmp;
-    for (i = DIGITS + 2 - 1; i > 0; i--)
-    {
+    for(i = DIGITS+2-1; i > 0; i--) {
         tmp = result[i] >> RADIX;
         result[i - 1] += tmp;
         result[i] -= (tmp << RADIX);
     }
 
-    // we dont ned to normalize the integer part since only last two bits of
-    // this will be used subsequently algorithm which remain unaltered by this
-    // normalization. tmp = result[0] >> RADIX; result[0] -= (tmp << RADIX);
-    unsigned int N = (unsigned int)result[0];
+    // we dont ned to normalize the integer part since only last two bits of this will be used
+    // subsequently algorithm which remain unaltered by this normalization.
+    // tmp = result[0] >> RADIX;
+    // result[0] -= (tmp << RADIX);
+    unsigned int N = (unsigned int) result[0];
 
-    // if the result is > pi/4, bring it to (-pi/4, pi/4] range. Note that
-    // testing if the final x_star = pi/2*(x*2/pi - k) > pi/4 is equivalent to
-    // testing, at this stage, if r[1] (the first fractional digit) is greater
-    // than (2^RADIX)/2 and substracting pi/4 from x_star to bring it to
-    // mentioned range is equivalent to substracting fractional part at this
-    // stage from one and changing the sign.
+    // if the result is > pi/4, bring it to (-pi/4, pi/4] range. Note that testing if the final
+    // x_star = pi/2*(x*2/pi - k) > pi/4 is equivalent to testing, at this stage, if r[1] (the first fractional
+    // digit) is greater than (2^RADIX)/2 and substracting pi/4 from x_star to bring it to mentioned
+    // range is equivalent to substracting fractional part at this stage from one and changing the sign.
     int sign = 1;
-    if (result[1] > (uint64_t)(1 << (RADIX - 1)))
-    {
-        for (i = 1; i < (DIGITS + 2); i++)
+    if(result[1] > (uint64_t)(1 << (RADIX - 1))) {
+        for(i = 1; i < (DIGITS + 2); i++)
             result[i] = (~((unsigned int)result[i]) & 0x3fffffff);
         N += 1;
         sign = -1;
     }
 
-    // Again as per Kahan-McDonald algorithim there may be 61 leading zeros in
-    // the worst case (when x is multiple of 2/pi very close to an integer) so
-    // we need to get rid of these zeros and adjust the index of final result.
-    // So in the worst case, precision of comupted result is 90 bits (150 bits
-    // original bits - 60 lost in cancellation).
+    // Again as per Kahan-McDonald algorithim there may be 61 leading zeros in the worst case
+    // (when x is multiple of 2/pi very close to an integer) so we need to get rid of these zeros
+    // and adjust the index of final result. So in the worst case, precision of comupted result is
+    // 90 bits (150 bits original bits - 60 lost in cancellation).
     int ind = 1;
-    for (i = 1; i < (DIGITS + 2); i++)
-    {
-        if (result[i] != 0)
+    for(i = 1; i < (DIGITS+2); i++) {
+        if(result[i] != 0)
             break;
         else
             ind++;
     }
 
-    uint64_t r[DIGITS - 1];
-    for (i = 0; i < (DIGITS - 1); i++)
-    {
+    uint64_t r[DIGITS-1];
+    for(i = 0; i < (DIGITS-1); i++) {
         r[i] = 0;
-        for (j = 0; j <= i; j++)
-        {
-            r[i] += (result[ind + i - j] * (uint64_t)pi_over_two[j]);
+        for(j = 0; j <= i; j++) {
+            r[i] += (result[ind+i-j] * (uint64_t) pi_over_two[j]);
         }
     }
-    for (i = (DIGITS - 2); i > 0; i--)
-    {
+    for(i = (DIGITS-2); i > 0; i--) {
         tmp = r[i] >> RADIX;
         r[i - 1] += tmp;
         r[i] -= (tmp << RADIX);
@@ -3943,127 +3748,147 @@
     r[0] -= (tmp << RADIX);
 
     eprep_t epr;
-    epr.sign = epx.sign * sign;
-    if (tmp != 0)
-    {
+    epr.sign = epx.sign*sign;
+    if(tmp != 0) {
         epr.index = -ind + 1;
-        epr.X[0] = (uint32_t)tmp;
-        epr.X[1] = (uint32_t)r[0];
-        epr.X[2] = (uint32_t)r[1];
+        epr.X[0] = (uint32_t) tmp;
+        epr.X[1] = (uint32_t) r[0];
+        epr.X[2] = (uint32_t) r[1];
     }
-    else
-    {
+    else {
         epr.index = -ind;
-        epr.X[0] = (uint32_t)r[0];
-        epr.X[1] = (uint32_t)r[1];
-        epr.X[2] = (uint32_t)r[2];
+        epr.X[0] = (uint32_t) r[0];
+        epr.X[1] = (uint32_t) r[1];
+        epr.X[2] = (uint32_t) r[2];
     }
 
-    *y = eprep_to_double(epr);
-    return epx.sign * N;
+    *y = eprep_to_double( epr );
+    return epx.sign*N;
 }
 
 double reference_relaxed_cos(double x)
 {
-    if (isnan(x)) return NAN;
-    return (float)cos((float)x);
+  if(isnan(x))
+    return NAN;
+  return (float)cos((float)x);
 }
 
 double reference_cos(double x)
 {
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return cos(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return cos( x );
     unsigned int c = N & 3;
-    switch (c)
-    {
-        case 0: return cos(x);
-        case 1: return -sin(x);
-        case 2: return -cos(x);
-        case 3: return sin(x);
+    switch ( c ) {
+        case 0:
+            return  cos( x );
+        case 1:
+            return -sin( x );
+        case 2:
+            return -cos( x );
+        case 3:
+            return  sin( x );
     }
     return 0.0;
 }
 
-double reference_relaxed_sin(double x) { return (float)sin((float)x); }
+double reference_relaxed_sin(double x){
+  return (float)sin((float)x);
+}
 
 double reference_sin(double x)
 {
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return sin(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return sin( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return sin(x);
-        case 1: return cos(x);
-        case 2: return -sin(x);
-        case 3: return -cos(x);
+    switch ( c ) {
+        case 0:
+            return  sin( x );
+        case 1:
+            return  cos( x );
+        case 2:
+            return -sin( x );
+        case 3:
+            return -cos( x );
     }
     return 0.0;
 }
 
-double reference_relaxed_sincos(double x, double *y)
-{
-    *y = reference_relaxed_cos(x);
-    return reference_relaxed_sin(x);
+double reference_relaxed_sincos(double x, double * y){
+  *y = reference_relaxed_cos(x);
+  return reference_relaxed_sin(x);
 }
 
 double reference_sincos(double x, double *y)
 {
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception)
-    {
-        *y = cos(x);
-        return sin(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception ) {
+        *y = cos( x );
+        return sin( x );
     }
     int c = N & 3;
-    switch (c)
-    {
-        case 0: *y = cos(x); return sin(x);
-        case 1: *y = -sin(x); return cos(x);
-        case 2: *y = -cos(x); return -sin(x);
-        case 3: *y = sin(x); return -cos(x);
+    switch ( c ) {
+        case 0:
+            *y = cos( x );
+            return  sin( x );
+        case 1:
+            *y = -sin( x );
+            return  cos( x );
+        case 2:
+            *y = -cos( x );
+            return -sin( x );
+        case 3:
+            *y = sin( x );
+            return -cos( x );
     }
     return 0.0;
 }
 
-double reference_relaxed_tan(double x)
-{
-    return ((float)reference_relaxed_sin((float)x))
-        / ((float)reference_relaxed_cos((float)x));
+double reference_relaxed_tan(double x){
+  return ((float) reference_relaxed_sin((float)x))/((float) reference_relaxed_cos((float)x));
 }
 
 double reference_tan(double x)
 {
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return tan(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return tan( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return tan(x);
-        case 1: return -1.0 / tan(x);
-        case 2: return tan(x);
-        case 3: return -1.0 / tan(x);
+    switch ( c ) {
+        case 0:
+            return  tan( x );
+        case 1:
+            return -1.0 / tan( x );
+        case 2:
+            return tan( x );
+        case 3:
+            return -1.0 / tan( x );
     }
     return 0.0;
 }
 
 long double reference_cosl(long double xx)
 {
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return cosl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return cosl( x );
     unsigned int c = N & 3;
-    switch (c)
-    {
-        case 0: return cosl(x);
-        case 1: return -sinl(x);
-        case 2: return -cosl(x);
-        case 3: return sinl(x);
+    switch ( c ) {
+        case 0:
+            return  cosl( x );
+        case 1:
+            return -sinl( x );
+        case 2:
+            return -cosl( x );
+        case 3:
+            return  sinl( x );
     }
     return 0.0;
 }
@@ -4072,20 +3897,25 @@
 {
     // we use system tanl after reduction which
     // can flush denorm input to zero so
-    // take care of it here.
-    if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022)) return xx;
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
+        return xx;
 
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return sinl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return sinl( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return sinl(x);
-        case 1: return cosl(x);
-        case 2: return -sinl(x);
-        case 3: return -cosl(x);
+    switch ( c ) {
+        case 0:
+            return  sinl( x );
+        case 1:
+            return  cosl( x );
+        case 2:
+            return -sinl( x );
+        case 3:
+            return -cosl( x );
     }
     return 0.0;
 }
@@ -4094,28 +3924,34 @@
 {
     // we use system tanl after reduction which
     // can flush denorm input to zero so
-    // take care of it here.
-    if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022))
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
     {
         *y = cosl(xx);
         return xx;
     }
 
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception)
-    {
-        *y = cosl(x);
-        return sinl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception ) {
+        *y = cosl( x );
+        return sinl( x );
     }
     int c = N & 3;
-    switch (c)
-    {
-        case 0: *y = cosl(x); return sinl(x);
-        case 1: *y = -sinl(x); return cosl(x);
-        case 2: *y = -cosl(x); return -sinl(x);
-        case 3: *y = sinl(x); return -cosl(x);
+    switch ( c ) {
+        case 0:
+            *y = cosl( x );
+            return  sinl( x );
+        case 1:
+            *y = -sinl( x );
+            return  cosl( x );
+        case 2:
+            *y = -cosl( x );
+            return -sinl( x );
+        case 3:
+            *y = sinl( x );
+            return -cosl( x );
     }
     return 0.0;
 }
@@ -4124,337 +3960,205 @@
 {
     // we use system tanl after reduction which
     // can flush denorm input to zero so
-    // take care of it here.
-    if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022)) return xx;
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
+        return xx;
 
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return tanl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return tanl( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return tanl(x);
-        case 1: return -1.0 / tanl(x);
-        case 2: return tanl(x);
-        case 3: return -1.0 / tanl(x);
+    switch ( c ) {
+        case 0:
+            return  tanl( x );
+        case 1:
+            return -1.0 / tanl( x );
+        case 2:
+            return tanl( x );
+        case 3:
+            return -1.0 / tanl( x );
     }
     return 0.0;
 }
 
 static double __loglTable1[64][3] = {
-    { HEX_DBL(+, 1, 5390948f40fea, +, 0), HEX_DBL(-, 1, a152f142a, -, 2),
-      HEX_DBL(+, 1, f93e27b43bd2c, -, 40) },
-    { HEX_DBL(+, 1, 5015015015015, +, 0), HEX_DBL(-, 1, 921800925, -, 2),
-      HEX_DBL(+, 1, 162432a1b8df7, -, 41) },
-    { HEX_DBL(+, 1, 4cab88725af6e, +, 0), HEX_DBL(-, 1, 8304d90c18, -, 2),
-      HEX_DBL(+, 1, 80bb749056fe7, -, 40) },
-    { HEX_DBL(+, 1, 49539e3b2d066, +, 0), HEX_DBL(-, 1, 7418acebc, -, 2),
-      HEX_DBL(+, 1, ceac7f0607711, -, 43) },
-    { HEX_DBL(+, 1, 460cbc7f5cf9a, +, 0), HEX_DBL(-, 1, 6552b49988, -, 2),
-      HEX_DBL(+, 1, d8913d0e89fa, -, 42) },
-    { HEX_DBL(+, 1, 42d6625d51f86, +, 0), HEX_DBL(-, 1, 56b22e6b58, -, 2),
-      HEX_DBL(+, 1, c7eaf515033a1, -, 44) },
-    { HEX_DBL(+, 1, 3fb013fb013fb, +, 0), HEX_DBL(-, 1, 48365e696, -, 2),
-      HEX_DBL(+, 1, 434adcde7edc7, -, 41) },
-    { HEX_DBL(+, 1, 3c995a47babe7, +, 0), HEX_DBL(-, 1, 39de8e156, -, 2),
-      HEX_DBL(+, 1, 8246f8e527754, -, 40) },
-    { HEX_DBL(+, 1, 3991c2c187f63, +, 0), HEX_DBL(-, 1, 2baa0c34c, -, 2),
-      HEX_DBL(+, 1, e1513c28e180d, -, 42) },
-    { HEX_DBL(+, 1, 3698df3de0747, +, 0), HEX_DBL(-, 1, 1d982c9d58, -, 2),
-      HEX_DBL(+, 1, 63ea3fed4b8a2, -, 40) },
-    { HEX_DBL(+, 1, 33ae45b57bcb1, +, 0), HEX_DBL(-, 1, 0fa848045, -, 2),
-      HEX_DBL(+, 1, 32ccbacf1779b, -, 40) },
-    { HEX_DBL(+, 1, 30d190130d19, +, 0), HEX_DBL(-, 1, 01d9bbcfa8, -, 2),
-      HEX_DBL(+, 1, e2bfeb2b884aa, -, 42) },
-    { HEX_DBL(+, 1, 2e025c04b8097, +, 0), HEX_DBL(-, 1, e857d3d37, -, 3),
-      HEX_DBL(+, 1, d9309b4d2ea85, -, 40) },
-    { HEX_DBL(+, 1, 2b404ad012b4, +, 0), HEX_DBL(-, 1, cd3c712d4, -, 3),
-      HEX_DBL(+, 1, ddf360962d7ab, -, 40) },
-    { HEX_DBL(+, 1, 288b01288b012, +, 0), HEX_DBL(-, 1, b2602497e, -, 3),
-      HEX_DBL(+, 1, 597f8a121640f, -, 40) },
-    { HEX_DBL(+, 1, 25e22708092f1, +, 0), HEX_DBL(-, 1, 97c1cb13d, -, 3),
-      HEX_DBL(+, 1, 02807d15580dc, -, 40) },
-    { HEX_DBL(+, 1, 23456789abcdf, +, 0), HEX_DBL(-, 1, 7d60496d, -, 3),
-      HEX_DBL(+, 1, 12ce913d7a827, -, 41) },
-    { HEX_DBL(+, 1, 20b470c67c0d8, +, 0), HEX_DBL(-, 1, 633a8bf44, -, 3),
-      HEX_DBL(+, 1, 0648bca9c96bd, -, 40) },
-    { HEX_DBL(+, 1, 1e2ef3b3fb874, +, 0), HEX_DBL(-, 1, 494f863b9, -, 3),
-      HEX_DBL(+, 1, 066fceb89b0eb, -, 42) },
-    { HEX_DBL(+, 1, 1bb4a4046ed29, +, 0), HEX_DBL(-, 1, 2f9e32d5c, -, 3),
-      HEX_DBL(+, 1, 17b8b6c4f846b, -, 46) },
-    { HEX_DBL(+, 1, 19453808ca29c, +, 0), HEX_DBL(-, 1, 162593187, -, 3),
-      HEX_DBL(+, 1, 2c83506452154, -, 42) },
-    { HEX_DBL(+, 1, 16e0689427378, +, 0), HEX_DBL(-, 1, f9c95dc1e, -, 4),
-      HEX_DBL(+, 1, dd5d2183150f3, -, 41) },
-    { HEX_DBL(+, 1, 1485f0e0acd3b, +, 0), HEX_DBL(-, 1, c7b528b72, -, 4),
-      HEX_DBL(+, 1, 0e43c4f4e619d, -, 40) },
-    { HEX_DBL(+, 1, 12358e75d3033, +, 0), HEX_DBL(-, 1, 960caf9ac, -, 4),
-      HEX_DBL(+, 1, 20fbfd5902a1e, -, 42) },
-    { HEX_DBL(+, 1, 0fef010fef01, +, 0), HEX_DBL(-, 1, 64ce26c08, -, 4),
-      HEX_DBL(+, 1, 8ebeefb4ac467, -, 40) },
-    { HEX_DBL(+, 1, 0db20a88f4695, +, 0), HEX_DBL(-, 1, 33f7cde16, -, 4),
-      HEX_DBL(+, 1, 30b3312da7a7d, -, 40) },
-    { HEX_DBL(+, 1, 0b7e6ec259dc7, +, 0), HEX_DBL(-, 1, 0387efbcc, -, 4),
-      HEX_DBL(+, 1, 796f1632949c3, -, 40) },
-    { HEX_DBL(+, 1, 0953f39010953, +, 0), HEX_DBL(-, 1, a6f9c378, -, 5),
-      HEX_DBL(+, 1, 1687e151172cc, -, 40) },
-    { HEX_DBL(+, 1, 073260a47f7c6, +, 0), HEX_DBL(-, 1, 47aa07358, -, 5),
-      HEX_DBL(+, 1, 1f87e4a9cc778, -, 42) },
-    { HEX_DBL(+, 1, 05197f7d73404, +, 0), HEX_DBL(-, 1, d23afc498, -, 6),
-      HEX_DBL(+, 1, b183a6b628487, -, 40) },
-    { HEX_DBL(+, 1, 03091b51f5e1a, +, 0), HEX_DBL(-, 1, 16a21e21, -, 6),
-      HEX_DBL(+, 1, 7d75c58973ce5, -, 40) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, f44659e4a4271, -, 1), HEX_DBL(+, 1, 11cd1d51, -, 5),
-      HEX_DBL(+, 1, 9a0d857e2f4b2, -, 40) },
-    { HEX_DBL(+, 1, ecc07b301ecc, -, 1), HEX_DBL(+, 1, c4dfab908, -, 5),
-      HEX_DBL(+, 1, 55b53fce557fd, -, 40) },
-    { HEX_DBL(+, 1, e573ac901e573, -, 1), HEX_DBL(+, 1, 3aa2fdd26, -, 4),
-      HEX_DBL(+, 1, f1cb0c9532089, -, 40) },
-    { HEX_DBL(+, 1, de5d6e3f8868a, -, 1), HEX_DBL(+, 1, 918a16e46, -, 4),
-      HEX_DBL(+, 1, 9af0dcd65a6e1, -, 43) },
-    { HEX_DBL(+, 1, d77b654b82c33, -, 1), HEX_DBL(+, 1, e72ec117e, -, 4),
-      HEX_DBL(+, 1, a5b93c4ebe124, -, 40) },
-    { HEX_DBL(+, 1, d0cb58f6ec074, -, 1), HEX_DBL(+, 1, 1dcd19755, -, 3),
-      HEX_DBL(+, 1, 5be50e71ddc6c, -, 42) },
-    { HEX_DBL(+, 1, ca4b3055ee191, -, 1), HEX_DBL(+, 1, 476a9f983, -, 3),
-      HEX_DBL(+, 1, ee9a798719e7f, -, 40) },
-    { HEX_DBL(+, 1, c3f8f01c3f8f, -, 1), HEX_DBL(+, 1, 70742d4ef, -, 3),
-      HEX_DBL(+, 1, 3ff1352c1219c, -, 46) },
-    { HEX_DBL(+, 1, bdd2b899406f7, -, 1), HEX_DBL(+, 1, 98edd077e, -, 3),
-      HEX_DBL(+, 1, c383cd11362f4, -, 41) },
-    { HEX_DBL(+, 1, b7d6c3dda338b, -, 1), HEX_DBL(+, 1, c0db6cdd9, -, 3),
-      HEX_DBL(+, 1, 37bd85b1a824e, -, 41) },
-    { HEX_DBL(+, 1, b2036406c80d9, -, 1), HEX_DBL(+, 1, e840be74e, -, 3),
-      HEX_DBL(+, 1, a9334d525e1ec, -, 41) },
-    { HEX_DBL(+, 1, ac5701ac5701a, -, 1), HEX_DBL(+, 1, 0790adbb, -, 2),
-      HEX_DBL(+, 1, 8060bfb6a491, -, 41) },
-    { HEX_DBL(+, 1, a6d01a6d01a6d, -, 1), HEX_DBL(+, 1, 1ac05b2918, -, 2),
-      HEX_DBL(+, 1, c1c161471580a, -, 40) },
-    { HEX_DBL(+, 1, a16d3f97a4b01, -, 1), HEX_DBL(+, 1, 2db10fc4d8, -, 2),
-      HEX_DBL(+, 1, ab1aa62214581, -, 42) },
-    { HEX_DBL(+, 1, 9c2d14ee4a101, -, 1), HEX_DBL(+, 1, 406463b1b, -, 2),
-      HEX_DBL(+, 1, 12e95dbda6611, -, 44) },
-    { HEX_DBL(+, 1, 970e4f80cb872, -, 1), HEX_DBL(+, 1, 52dbdfc4c8, -, 2),
-      HEX_DBL(+, 1, 6b53fee511af, -, 42) },
-    { HEX_DBL(+, 1, 920fb49d0e228, -, 1), HEX_DBL(+, 1, 6518fe467, -, 2),
-      HEX_DBL(+, 1, eea7d7d7d1764, -, 40) },
-    { HEX_DBL(+, 1, 8d3018d3018d3, -, 1), HEX_DBL(+, 1, 771d2ba7e8, -, 2),
-      HEX_DBL(+, 1, ecefa8d4fab97, -, 40) },
-    { HEX_DBL(+, 1, 886e5f0abb049, -, 1), HEX_DBL(+, 1, 88e9c72e08, -, 2),
-      HEX_DBL(+, 1, 913ea3d33fd14, -, 41) },
-    { HEX_DBL(+, 1, 83c977ab2bedd, -, 1), HEX_DBL(+, 1, 9a802391e, -, 2),
-      HEX_DBL(+, 1, 197e845877c94, -, 41) },
-    { HEX_DBL(+, 1, 7f405fd017f4, -, 1), HEX_DBL(+, 1, abe18797f, -, 2),
-      HEX_DBL(+, 1, f4a52f8e8a81, -, 42) },
-    { HEX_DBL(+, 1, 7ad2208e0ecc3, -, 1), HEX_DBL(+, 1, bd0f2e9e78, -, 2),
-      HEX_DBL(+, 1, 031f4336644cc, -, 42) },
-    { HEX_DBL(+, 1, 767dce434a9b1, -, 1), HEX_DBL(+, 1, ce0a4923a, -, 2),
-      HEX_DBL(+, 1, 61f33c897020c, -, 40) },
-    { HEX_DBL(+, 1, 724287f46debc, -, 1), HEX_DBL(+, 1, ded3fd442, -, 2),
-      HEX_DBL(+, 1, b2632e830632, -, 41) },
-    { HEX_DBL(+, 1, 6e1f76b4337c6, -, 1), HEX_DBL(+, 1, ef6d673288, -, 2),
-      HEX_DBL(+, 1, 888ec245a0bf, -, 40) },
-    { HEX_DBL(+, 1, 6a13cd153729, -, 1), HEX_DBL(+, 1, ffd799a838, -, 2),
-      HEX_DBL(+, 1, fe6f3b2f5fc8e, -, 40) },
-    { HEX_DBL(+, 1, 661ec6a5122f9, -, 1), HEX_DBL(+, 1, 0809cf27f4, -, 1),
-      HEX_DBL(+, 1, 81eaa9ef284dd, -, 40) },
-    { HEX_DBL(+, 1, 623fa7701623f, -, 1), HEX_DBL(+, 1, 10113b153c, -, 1),
-      HEX_DBL(+, 1, 1d7b07d6b1143, -, 42) },
-    { HEX_DBL(+, 1, 5e75bb8d015e7, -, 1), HEX_DBL(+, 1, 18028cf728, -, 1),
-      HEX_DBL(+, 1, 76b100b1f6c6, -, 41) },
-    { HEX_DBL(+, 1, 5ac056b015ac, -, 1), HEX_DBL(+, 1, 1fde3d30e8, -, 1),
-      HEX_DBL(+, 1, 26faeb9870945, -, 45) },
-    { HEX_DBL(+, 1, 571ed3c506b39, -, 1), HEX_DBL(+, 1, 27a4c0585c, -, 1),
-      HEX_DBL(+, 1, 7f2c5344d762b, -, 42) }
+{HEX_DBL( +, 1, 5390948f40fea, +, 0 ), HEX_DBL( -, 1, a152f142a,  -, 2 ), HEX_DBL( +, 1, f93e27b43bd2c, -, 40 )},
+{HEX_DBL( +, 1, 5015015015015, +, 0 ), HEX_DBL( -, 1, 921800925,  -, 2 ), HEX_DBL( +, 1, 162432a1b8df7, -, 41 )},
+{HEX_DBL( +, 1, 4cab88725af6e, +, 0 ), HEX_DBL( -, 1, 8304d90c18, -, 2 ), HEX_DBL( +, 1, 80bb749056fe7, -, 40 )},
+{HEX_DBL( +, 1, 49539e3b2d066, +, 0 ), HEX_DBL( -, 1, 7418acebc,  -, 2 ), HEX_DBL( +, 1, ceac7f0607711, -, 43 )},
+{HEX_DBL( +, 1, 460cbc7f5cf9a, +, 0 ), HEX_DBL( -, 1, 6552b49988, -, 2 ), HEX_DBL( +, 1, d8913d0e89fa,  -, 42 )},
+{HEX_DBL( +, 1, 42d6625d51f86, +, 0 ), HEX_DBL( -, 1, 56b22e6b58, -, 2 ), HEX_DBL( +, 1, c7eaf515033a1, -, 44 )},
+{HEX_DBL( +, 1, 3fb013fb013fb, +, 0 ), HEX_DBL( -, 1, 48365e696,  -, 2 ), HEX_DBL( +, 1, 434adcde7edc7, -, 41 )},
+{HEX_DBL( +, 1, 3c995a47babe7, +, 0 ), HEX_DBL( -, 1, 39de8e156,  -, 2 ), HEX_DBL( +, 1, 8246f8e527754, -, 40 )},
+{HEX_DBL( +, 1, 3991c2c187f63, +, 0 ), HEX_DBL( -, 1, 2baa0c34c,  -, 2 ), HEX_DBL( +, 1, e1513c28e180d, -, 42 )},
+{HEX_DBL( +, 1, 3698df3de0747, +, 0 ), HEX_DBL( -, 1, 1d982c9d58, -, 2 ), HEX_DBL( +, 1, 63ea3fed4b8a2, -, 40 )},
+{HEX_DBL( +, 1, 33ae45b57bcb1, +, 0 ), HEX_DBL( -, 1, 0fa848045,  -, 2 ), HEX_DBL( +, 1, 32ccbacf1779b, -, 40 )},
+{HEX_DBL( +, 1, 30d190130d19,  +, 0 ), HEX_DBL( -, 1, 01d9bbcfa8, -, 2 ), HEX_DBL( +, 1, e2bfeb2b884aa, -, 42 )},
+{HEX_DBL( +, 1, 2e025c04b8097, +, 0 ), HEX_DBL( -, 1, e857d3d37,  -, 3 ), HEX_DBL( +, 1, d9309b4d2ea85, -, 40 )},
+{HEX_DBL( +, 1, 2b404ad012b4,  +, 0 ), HEX_DBL( -, 1, cd3c712d4,  -, 3 ), HEX_DBL( +, 1, ddf360962d7ab, -, 40 )},
+{HEX_DBL( +, 1, 288b01288b012, +, 0 ), HEX_DBL( -, 1, b2602497e,  -, 3 ), HEX_DBL( +, 1, 597f8a121640f, -, 40 )},
+{HEX_DBL( +, 1, 25e22708092f1, +, 0 ), HEX_DBL( -, 1, 97c1cb13d,  -, 3 ), HEX_DBL( +, 1, 02807d15580dc, -, 40 )},
+{HEX_DBL( +, 1, 23456789abcdf, +, 0 ), HEX_DBL( -, 1, 7d60496d,   -, 3 ), HEX_DBL( +, 1, 12ce913d7a827, -, 41 )},
+{HEX_DBL( +, 1, 20b470c67c0d8, +, 0 ), HEX_DBL( -, 1, 633a8bf44,  -, 3 ), HEX_DBL( +, 1, 0648bca9c96bd, -, 40 )},
+{HEX_DBL( +, 1, 1e2ef3b3fb874, +, 0 ), HEX_DBL( -, 1, 494f863b9,  -, 3 ), HEX_DBL( +, 1, 066fceb89b0eb, -, 42 )},
+{HEX_DBL( +, 1, 1bb4a4046ed29, +, 0 ), HEX_DBL( -, 1, 2f9e32d5c,  -, 3 ), HEX_DBL( +, 1, 17b8b6c4f846b, -, 46 )},
+{HEX_DBL( +, 1, 19453808ca29c, +, 0 ), HEX_DBL( -, 1, 162593187,  -, 3 ), HEX_DBL( +, 1, 2c83506452154, -, 42 )},
+{HEX_DBL( +, 1, 16e0689427378, +, 0 ), HEX_DBL( -, 1, f9c95dc1e,  -, 4 ), HEX_DBL( +, 1, dd5d2183150f3, -, 41 )},
+{HEX_DBL( +, 1, 1485f0e0acd3b, +, 0 ), HEX_DBL( -, 1, c7b528b72,  -, 4 ), HEX_DBL( +, 1, 0e43c4f4e619d, -, 40 )},
+{HEX_DBL( +, 1, 12358e75d3033, +, 0 ), HEX_DBL( -, 1, 960caf9ac,  -, 4 ), HEX_DBL( +, 1, 20fbfd5902a1e, -, 42 )},
+{HEX_DBL( +, 1, 0fef010fef01,  +, 0 ), HEX_DBL( -, 1, 64ce26c08,  -, 4 ), HEX_DBL( +, 1, 8ebeefb4ac467, -, 40 )},
+{HEX_DBL( +, 1, 0db20a88f4695, +, 0 ), HEX_DBL( -, 1, 33f7cde16,  -, 4 ), HEX_DBL( +, 1, 30b3312da7a7d, -, 40 )},
+{HEX_DBL( +, 1, 0b7e6ec259dc7, +, 0 ), HEX_DBL( -, 1, 0387efbcc,  -, 4 ), HEX_DBL( +, 1, 796f1632949c3, -, 40 )},
+{HEX_DBL( +, 1, 0953f39010953, +, 0 ), HEX_DBL( -, 1, a6f9c378,   -, 5 ), HEX_DBL( +, 1, 1687e151172cc, -, 40 )},
+{HEX_DBL( +, 1, 073260a47f7c6, +, 0 ), HEX_DBL( -, 1, 47aa07358,  -, 5 ), HEX_DBL( +, 1, 1f87e4a9cc778, -, 42 )},
+{HEX_DBL( +, 1, 05197f7d73404, +, 0 ), HEX_DBL( -, 1, d23afc498,  -, 6 ), HEX_DBL( +, 1, b183a6b628487, -, 40 )},
+{HEX_DBL( +, 1, 03091b51f5e1a, +, 0 ), HEX_DBL( -, 1, 16a21e21,   -, 6 ), HEX_DBL( +, 1, 7d75c58973ce5, -, 40 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,          +, 0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,          +, 0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, f44659e4a4271, -, 1 ), HEX_DBL( +, 1, 11cd1d51,   -, 5 ), HEX_DBL( +, 1, 9a0d857e2f4b2, -, 40 )},
+{HEX_DBL( +, 1, ecc07b301ecc,  -, 1 ), HEX_DBL( +, 1, c4dfab908,  -, 5 ), HEX_DBL( +, 1, 55b53fce557fd, -, 40 )},
+{HEX_DBL( +, 1, e573ac901e573, -, 1 ), HEX_DBL( +, 1, 3aa2fdd26,  -, 4 ), HEX_DBL( +, 1, f1cb0c9532089, -, 40 )},
+{HEX_DBL( +, 1, de5d6e3f8868a, -, 1 ), HEX_DBL( +, 1, 918a16e46,  -, 4 ), HEX_DBL( +, 1, 9af0dcd65a6e1, -, 43 )},
+{HEX_DBL( +, 1, d77b654b82c33, -, 1 ), HEX_DBL( +, 1, e72ec117e,  -, 4 ), HEX_DBL( +, 1, a5b93c4ebe124, -, 40 )},
+{HEX_DBL( +, 1, d0cb58f6ec074, -, 1 ), HEX_DBL( +, 1, 1dcd19755,  -, 3 ), HEX_DBL( +, 1, 5be50e71ddc6c, -, 42 )},
+{HEX_DBL( +, 1, ca4b3055ee191, -, 1 ), HEX_DBL( +, 1, 476a9f983,  -, 3 ), HEX_DBL( +, 1, ee9a798719e7f, -, 40 )},
+{HEX_DBL( +, 1, c3f8f01c3f8f,  -, 1 ), HEX_DBL( +, 1, 70742d4ef,  -, 3 ), HEX_DBL( +, 1, 3ff1352c1219c, -, 46 )},
+{HEX_DBL( +, 1, bdd2b899406f7, -, 1 ), HEX_DBL( +, 1, 98edd077e,  -, 3 ), HEX_DBL( +, 1, c383cd11362f4, -, 41 )},
+{HEX_DBL( +, 1, b7d6c3dda338b, -, 1 ), HEX_DBL( +, 1, c0db6cdd9,  -, 3 ), HEX_DBL( +, 1, 37bd85b1a824e, -, 41 )},
+{HEX_DBL( +, 1, b2036406c80d9, -, 1 ), HEX_DBL( +, 1, e840be74e,  -, 3 ), HEX_DBL( +, 1, a9334d525e1ec, -, 41 )},
+{HEX_DBL( +, 1, ac5701ac5701a, -, 1 ), HEX_DBL( +, 1, 0790adbb,   -, 2 ), HEX_DBL( +, 1, 8060bfb6a491,  -, 41 )},
+{HEX_DBL( +, 1, a6d01a6d01a6d, -, 1 ), HEX_DBL( +, 1, 1ac05b2918, -, 2 ), HEX_DBL( +, 1, c1c161471580a, -, 40 )},
+{HEX_DBL( +, 1, a16d3f97a4b01, -, 1 ), HEX_DBL( +, 1, 2db10fc4d8, -, 2 ), HEX_DBL( +, 1, ab1aa62214581, -, 42 )},
+{HEX_DBL( +, 1, 9c2d14ee4a101, -, 1 ), HEX_DBL( +, 1, 406463b1b,  -, 2 ), HEX_DBL( +, 1, 12e95dbda6611, -, 44 )},
+{HEX_DBL( +, 1, 970e4f80cb872, -, 1 ), HEX_DBL( +, 1, 52dbdfc4c8, -, 2 ), HEX_DBL( +, 1, 6b53fee511af,  -, 42 )},
+{HEX_DBL( +, 1, 920fb49d0e228, -, 1 ), HEX_DBL( +, 1, 6518fe467,  -, 2 ), HEX_DBL( +, 1, eea7d7d7d1764, -, 40 )},
+{HEX_DBL( +, 1, 8d3018d3018d3, -, 1 ), HEX_DBL( +, 1, 771d2ba7e8, -, 2 ), HEX_DBL( +, 1, ecefa8d4fab97, -, 40 )},
+{HEX_DBL( +, 1, 886e5f0abb049, -, 1 ), HEX_DBL( +, 1, 88e9c72e08, -, 2 ), HEX_DBL( +, 1, 913ea3d33fd14, -, 41 )},
+{HEX_DBL( +, 1, 83c977ab2bedd, -, 1 ), HEX_DBL( +, 1, 9a802391e,  -, 2 ), HEX_DBL( +, 1, 197e845877c94, -, 41 )},
+{HEX_DBL( +, 1, 7f405fd017f4,  -, 1 ), HEX_DBL( +, 1, abe18797f,  -, 2 ), HEX_DBL( +, 1, f4a52f8e8a81,  -, 42 )},
+{HEX_DBL( +, 1, 7ad2208e0ecc3, -, 1 ), HEX_DBL( +, 1, bd0f2e9e78, -, 2 ), HEX_DBL( +, 1, 031f4336644cc, -, 42 )},
+{HEX_DBL( +, 1, 767dce434a9b1, -, 1 ), HEX_DBL( +, 1, ce0a4923a,  -, 2 ), HEX_DBL( +, 1, 61f33c897020c, -, 40 )},
+{HEX_DBL( +, 1, 724287f46debc, -, 1 ), HEX_DBL( +, 1, ded3fd442,  -, 2 ), HEX_DBL( +, 1, b2632e830632,  -, 41 )},
+{HEX_DBL( +, 1, 6e1f76b4337c6, -, 1 ), HEX_DBL( +, 1, ef6d673288, -, 2 ), HEX_DBL( +, 1, 888ec245a0bf,  -, 40 )},
+{HEX_DBL( +, 1, 6a13cd153729,  -, 1 ), HEX_DBL( +, 1, ffd799a838, -, 2 ), HEX_DBL( +, 1, fe6f3b2f5fc8e, -, 40 )},
+{HEX_DBL( +, 1, 661ec6a5122f9, -, 1 ), HEX_DBL( +, 1, 0809cf27f4, -, 1 ), HEX_DBL( +, 1, 81eaa9ef284dd, -, 40 )},
+{HEX_DBL( +, 1, 623fa7701623f, -, 1 ), HEX_DBL( +, 1, 10113b153c, -, 1 ), HEX_DBL( +, 1, 1d7b07d6b1143, -, 42 )},
+{HEX_DBL( +, 1, 5e75bb8d015e7, -, 1 ), HEX_DBL( +, 1, 18028cf728, -, 1 ), HEX_DBL( +, 1, 76b100b1f6c6,  -, 41 )},
+{HEX_DBL( +, 1, 5ac056b015ac,  -, 1 ), HEX_DBL( +, 1, 1fde3d30e8, -, 1 ), HEX_DBL( +, 1, 26faeb9870945, -, 45 )},
+{HEX_DBL( +, 1, 571ed3c506b39, -, 1 ), HEX_DBL( +, 1, 27a4c0585c, -, 1 ), HEX_DBL( +, 1, 7f2c5344d762b, -, 42 )}
 };
 
 static double __loglTable2[64][3] = {
-    { HEX_DBL(+, 1, 01fbe7f0a1be6, +, 0), HEX_DBL(-, 1, 6cf6ddd26112a, -, 7),
-      HEX_DBL(+, 1, 0725e5755e314, -, 60) },
-    { HEX_DBL(+, 1, 01eba93a97b12, +, 0), HEX_DBL(-, 1, 6155b1d99f603, -, 7),
-      HEX_DBL(+, 1, 4bcea073117f4, -, 60) },
-    { HEX_DBL(+, 1, 01db6c9029cd1, +, 0), HEX_DBL(-, 1, 55b54153137ff, -, 7),
-      HEX_DBL(+, 1, 21e8faccad0ec, -, 61) },
-    { HEX_DBL(+, 1, 01cb31f0f534c, +, 0), HEX_DBL(-, 1, 4a158c27245bd, -, 7),
-      HEX_DBL(+, 1, 1a5b7bfbf35d3, -, 60) },
-    { HEX_DBL(+, 1, 01baf95c9723c, +, 0), HEX_DBL(-, 1, 3e76923e3d678, -, 7),
-      HEX_DBL(+, 1, eee400eb5fe34, -, 62) },
-    { HEX_DBL(+, 1, 01aac2d2acee6, +, 0), HEX_DBL(-, 1, 32d85380ce776, -, 7),
-      HEX_DBL(+, 1, cbf7a513937bd, -, 61) },
-    { HEX_DBL(+, 1, 019a8e52d401e, +, 0), HEX_DBL(-, 1, 273acfd74be72, -, 7),
-      HEX_DBL(+, 1, 5c64599efa5e6, -, 60) },
-    { HEX_DBL(+, 1, 018a5bdca9e42, +, 0), HEX_DBL(-, 1, 1b9e072a2e65, -, 7),
-      HEX_DBL(+, 1, 364180e0a5d37, -, 60) },
-    { HEX_DBL(+, 1, 017a2b6fcc33e, +, 0), HEX_DBL(-, 1, 1001f961f3243, -, 7),
-      HEX_DBL(+, 1, 63d795746f216, -, 60) },
-    { HEX_DBL(+, 1, 0169fd0bd8a8a, +, 0), HEX_DBL(-, 1, 0466a6671bca4, -, 7),
-      HEX_DBL(+, 1, 4c99ff1907435, -, 60) },
-    { HEX_DBL(+, 1, 0159d0b06d129, +, 0), HEX_DBL(-, 1, f1981c445cd05, -, 8),
-      HEX_DBL(+, 1, 4bfff6366b723, -, 62) },
-    { HEX_DBL(+, 1, 0149a65d275a6, +, 0), HEX_DBL(-, 1, da6460f76ab8c, -, 8),
-      HEX_DBL(+, 1, 9c5404f47589c, -, 61) },
-    { HEX_DBL(+, 1, 01397e11a581b, +, 0), HEX_DBL(-, 1, c3321ab87f4ef, -, 8),
-      HEX_DBL(+, 1, c0da537429cea, -, 61) },
-    { HEX_DBL(+, 1, 012957cd85a28, +, 0), HEX_DBL(-, 1, ac014958c112c, -, 8),
-      HEX_DBL(+, 1, 000c2a1b595e3, -, 64) },
-    { HEX_DBL(+, 1, 0119339065ef7, +, 0), HEX_DBL(-, 1, 94d1eca95f67a, -, 8),
-      HEX_DBL(+, 1, d8d20b0564d5, -, 61) },
-    { HEX_DBL(+, 1, 01091159e4b3d, +, 0), HEX_DBL(-, 1, 7da4047b92b3e, -, 8),
-      HEX_DBL(+, 1, 6194a5d68cf2, -, 66) },
-    { HEX_DBL(+, 1, 00f8f129a0535, +, 0), HEX_DBL(-, 1, 667790a09bf77, -, 8),
-      HEX_DBL(+, 1, ca230e0bea645, -, 61) },
-    { HEX_DBL(+, 1, 00e8d2ff374a1, +, 0), HEX_DBL(-, 1, 4f4c90e9c4ead, -, 8),
-      HEX_DBL(+, 1, 1de3e7f350c1, -, 61) },
-    { HEX_DBL(+, 1, 00d8b6da482ce, +, 0), HEX_DBL(-, 1, 3823052860649, -, 8),
-      HEX_DBL(+, 1, 5789b4c5891b8, -, 64) },
-    { HEX_DBL(+, 1, 00c89cba71a8c, +, 0), HEX_DBL(-, 1, 20faed2dc9a9e, -, 8),
-      HEX_DBL(+, 1, 9e7c40f9839fd, -, 62) },
-    { HEX_DBL(+, 1, 00b8849f52834, +, 0), HEX_DBL(-, 1, 09d448cb65014, -, 8),
-      HEX_DBL(+, 1, 387e3e9b6d02, -, 62) },
-    { HEX_DBL(+, 1, 00a86e88899a4, +, 0), HEX_DBL(-, 1, e55e2fa53ebf1, -, 9),
-      HEX_DBL(+, 1, cdaa71fddfddf, -, 62) },
-    { HEX_DBL(+, 1, 00985a75b5e3f, +, 0), HEX_DBL(-, 1, b716b429dce0f, -, 9),
-      HEX_DBL(+, 1, 2f2af081367bf, -, 63) },
-    { HEX_DBL(+, 1, 00884866766ee, +, 0), HEX_DBL(-, 1, 88d21ec7a16d7, -, 9),
-      HEX_DBL(+, 1, fb95c228d6f16, -, 62) },
-    { HEX_DBL(+, 1, 0078385a6a61d, +, 0), HEX_DBL(-, 1, 5a906f219a9e8, -, 9),
-      HEX_DBL(+, 1, 18aff10a89f29, -, 64) },
-    { HEX_DBL(+, 1, 00682a5130fbe, +, 0), HEX_DBL(-, 1, 2c51a4dae87f1, -, 9),
-      HEX_DBL(+, 1, bcc7e33ddde3, -, 63) },
-    { HEX_DBL(+, 1, 00581e4a69944, +, 0), HEX_DBL(-, 1, fc2b7f2d782b1, -, 10),
-      HEX_DBL(+, 1, fe3ef3300a9fa, -, 64) },
-    { HEX_DBL(+, 1, 00481445b39a8, +, 0), HEX_DBL(-, 1, 9fb97df0b0b83, -, 10),
-      HEX_DBL(+, 1, 0d9a601f2f324, -, 65) },
-    { HEX_DBL(+, 1, 00380c42ae963, +, 0), HEX_DBL(-, 1, 434d4546227ae, -, 10),
-      HEX_DBL(+, 1, 0b9b6a5868f33, -, 63) },
-    { HEX_DBL(+, 1, 00280640fa271, +, 0), HEX_DBL(-, 1, cdcda8e930c19, -, 11),
-      HEX_DBL(+, 1, 3d424ab39f789, -, 64) },
-    { HEX_DBL(+, 1, 0018024036051, +, 0), HEX_DBL(-, 1, 150c558601261, -, 11),
-      HEX_DBL(+, 1, 285bb90327a0f, -, 64) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, ffa011fca0a1e, -, 1), HEX_DBL(+, 1, 14e5640c4197b, -, 10),
-      HEX_DBL(+, 1, 95728136ae401, -, 63) },
-    { HEX_DBL(+, 1, ff6031f064e07, -, 1), HEX_DBL(+, 1, cd61806bf532d, -, 10),
-      HEX_DBL(+, 1, 568a4f35d8538, -, 63) },
-    { HEX_DBL(+, 1, ff2061d532b9c, -, 1), HEX_DBL(+, 1, 42e34af550eda, -, 9),
-      HEX_DBL(+, 1, 8f69cee55fec, -, 62) },
-    { HEX_DBL(+, 1, fee0a1a513253, -, 1), HEX_DBL(+, 1, 9f0a5523902ea, -, 9),
-      HEX_DBL(+, 1, daec734b11615, -, 63) },
-    { HEX_DBL(+, 1, fea0f15a12139, -, 1), HEX_DBL(+, 1, fb25e19f11b26, -, 9),
-      HEX_DBL(+, 1, 8bafca62941da, -, 62) },
-    { HEX_DBL(+, 1, fe6150ee3e6d4, -, 1), HEX_DBL(+, 1, 2b9af9a28e282, -, 8),
-      HEX_DBL(+, 1, 0fd3674e1dc5b, -, 61) },
-    { HEX_DBL(+, 1, fe21c05baa109, -, 1), HEX_DBL(+, 1, 599d4678f24b9, -, 8),
-      HEX_DBL(+, 1, dafce1f09937b, -, 61) },
-    { HEX_DBL(+, 1, fde23f9c69cf9, -, 1), HEX_DBL(+, 1, 8799d8c046eb, -, 8),
-      HEX_DBL(+, 1, ffa0ce0bdd217, -, 65) },
-    { HEX_DBL(+, 1, fda2ceaa956e8, -, 1), HEX_DBL(+, 1, b590b1e5951ee, -, 8),
-      HEX_DBL(+, 1, 645a769232446, -, 62) },
-    { HEX_DBL(+, 1, fd636d8047a1f, -, 1), HEX_DBL(+, 1, e381d3555dbcf, -, 8),
-      HEX_DBL(+, 1, 882320d368331, -, 61) },
-    { HEX_DBL(+, 1, fd241c179e0cc, -, 1), HEX_DBL(+, 1, 08b69f3dccde, -, 7),
-      HEX_DBL(+, 1, 01ad5065aba9e, -, 61) },
-    { HEX_DBL(+, 1, fce4da6ab93e8, -, 1), HEX_DBL(+, 1, 1fa97a61dd298, -, 7),
-      HEX_DBL(+, 1, 84cd1f931ae34, -, 60) },
-    { HEX_DBL(+, 1, fca5a873bcb19, -, 1), HEX_DBL(+, 1, 36997bcc54a3f, -, 7),
-      HEX_DBL(+, 1, 1485e97eaee03, -, 60) },
-    { HEX_DBL(+, 1, fc66862ccec93, -, 1), HEX_DBL(+, 1, 4d86a43264a4f, -, 7),
-      HEX_DBL(+, 1, c75e63370988b, -, 61) },
-    { HEX_DBL(+, 1, fc27739018cfe, -, 1), HEX_DBL(+, 1, 6470f448fb09d, -, 7),
-      HEX_DBL(+, 1, d7361eeaed0a1, -, 65) },
-    { HEX_DBL(+, 1, fbe87097c6f5a, -, 1), HEX_DBL(+, 1, 7b586cc4c2523, -, 7),
-      HEX_DBL(+, 1, b3df952cc473c, -, 61) },
-    { HEX_DBL(+, 1, fba97d3e084dd, -, 1), HEX_DBL(+, 1, 923d0e5a21e06, -, 7),
-      HEX_DBL(+, 1, cf56c7b64ae5d, -, 62) },
-    { HEX_DBL(+, 1, fb6a997d0ecdc, -, 1), HEX_DBL(+, 1, a91ed9bd3df9a, -, 7),
-      HEX_DBL(+, 1, b957bdcd89e43, -, 61) },
-    { HEX_DBL(+, 1, fb2bc54f0f4ab, -, 1), HEX_DBL(+, 1, bffdcfa1f7fbb, -, 7),
-      HEX_DBL(+, 1, ea8cad9a21771, -, 62) },
-    { HEX_DBL(+, 1, faed00ae41783, -, 1), HEX_DBL(+, 1, d6d9f0bbee6f6, -, 7),
-      HEX_DBL(+, 1, 5762a9af89c82, -, 60) },
-    { HEX_DBL(+, 1, faae4b94dfe64, -, 1), HEX_DBL(+, 1, edb33dbe7d335, -, 7),
-      HEX_DBL(+, 1, 21e24fc245697, -, 62) },
-    { HEX_DBL(+, 1, fa6fa5fd27ff8, -, 1), HEX_DBL(+, 1, 0244dbae5ed05, -, 6),
-      HEX_DBL(+, 1, 12ef51b967102, -, 60) },
-    { HEX_DBL(+, 1, fa310fe15a078, -, 1), HEX_DBL(+, 1, 0daeaf24c3529, -, 6),
-      HEX_DBL(+, 1, 10d3cfca60b45, -, 59) },
-    { HEX_DBL(+, 1, f9f2893bb9192, -, 1), HEX_DBL(+, 1, 1917199bb66bc, -, 6),
-      HEX_DBL(+, 1, 6cf6034c32e19, -, 60) },
-    { HEX_DBL(+, 1, f9b412068b247, -, 1), HEX_DBL(+, 1, 247e1b6c615d5, -, 6),
-      HEX_DBL(+, 1, 42f0fffa229f7, -, 61) },
-    { HEX_DBL(+, 1, f975aa3c18ed6, -, 1), HEX_DBL(+, 1, 2fe3b4efcc5ad, -, 6),
-      HEX_DBL(+, 1, 70106136a8919, -, 60) },
-    { HEX_DBL(+, 1, f93751d6ae09b, -, 1), HEX_DBL(+, 1, 3b47e67edea93, -, 6),
-      HEX_DBL(+, 1, 38dd5a4f6959a, -, 59) },
-    { HEX_DBL(+, 1, f8f908d098df6, -, 1), HEX_DBL(+, 1, 46aab0725ea6c, -, 6),
-      HEX_DBL(+, 1, 821fc1e799e01, -, 60) },
-    { HEX_DBL(+, 1, f8bacf242aa2c, -, 1), HEX_DBL(+, 1, 520c1322f1e4e, -, 6),
-      HEX_DBL(+, 1, 129dcda3ad563, -, 60) },
-    { HEX_DBL(+, 1, f87ca4cbb755, -, 1), HEX_DBL(+, 1, 5d6c0ee91d2ab, -, 6),
-      HEX_DBL(+, 1, c5b190c04606e, -, 62) },
-    { HEX_DBL(+, 1, f83e89c195c25, -, 1), HEX_DBL(+, 1, 68caa41d448c3, -, 6),
-      HEX_DBL(+, 1, 4723441195ac9, -, 59) }
+{HEX_DBL( +, 1, 01fbe7f0a1be6, +, 0 ), HEX_DBL( -, 1, 6cf6ddd26112a, -,  7 ), HEX_DBL( +, 1, 0725e5755e314, -, 60 )},
+{HEX_DBL( +, 1, 01eba93a97b12, +, 0 ), HEX_DBL( -, 1, 6155b1d99f603, -,  7 ), HEX_DBL( +, 1, 4bcea073117f4, -, 60 )},
+{HEX_DBL( +, 1, 01db6c9029cd1, +, 0 ), HEX_DBL( -, 1, 55b54153137ff, -,  7 ), HEX_DBL( +, 1, 21e8faccad0ec, -, 61 )},
+{HEX_DBL( +, 1, 01cb31f0f534c, +, 0 ), HEX_DBL( -, 1, 4a158c27245bd, -,  7 ), HEX_DBL( +, 1, 1a5b7bfbf35d3, -, 60 )},
+{HEX_DBL( +, 1, 01baf95c9723c, +, 0 ), HEX_DBL( -, 1, 3e76923e3d678, -,  7 ), HEX_DBL( +, 1, eee400eb5fe34, -, 62 )},
+{HEX_DBL( +, 1, 01aac2d2acee6, +, 0 ), HEX_DBL( -, 1, 32d85380ce776, -,  7 ), HEX_DBL( +, 1, cbf7a513937bd, -, 61 )},
+{HEX_DBL( +, 1, 019a8e52d401e, +, 0 ), HEX_DBL( -, 1, 273acfd74be72, -,  7 ), HEX_DBL( +, 1, 5c64599efa5e6, -, 60 )},
+{HEX_DBL( +, 1, 018a5bdca9e42, +, 0 ), HEX_DBL( -, 1, 1b9e072a2e65,  -,  7 ), HEX_DBL( +, 1, 364180e0a5d37, -, 60 )},
+{HEX_DBL( +, 1, 017a2b6fcc33e, +, 0 ), HEX_DBL( -, 1, 1001f961f3243, -,  7 ), HEX_DBL( +, 1, 63d795746f216, -, 60 )},
+{HEX_DBL( +, 1, 0169fd0bd8a8a, +, 0 ), HEX_DBL( -, 1, 0466a6671bca4, -,  7 ), HEX_DBL( +, 1, 4c99ff1907435, -, 60 )},
+{HEX_DBL( +, 1, 0159d0b06d129, +, 0 ), HEX_DBL( -, 1, f1981c445cd05, -,  8 ), HEX_DBL( +, 1, 4bfff6366b723, -, 62 )},
+{HEX_DBL( +, 1, 0149a65d275a6, +, 0 ), HEX_DBL( -, 1, da6460f76ab8c, -,  8 ), HEX_DBL( +, 1, 9c5404f47589c, -, 61 )},
+{HEX_DBL( +, 1, 01397e11a581b, +, 0 ), HEX_DBL( -, 1, c3321ab87f4ef, -,  8 ), HEX_DBL( +, 1, c0da537429cea, -, 61 )},
+{HEX_DBL( +, 1, 012957cd85a28, +, 0 ), HEX_DBL( -, 1, ac014958c112c, -,  8 ), HEX_DBL( +, 1, 000c2a1b595e3, -, 64 )},
+{HEX_DBL( +, 1, 0119339065ef7, +, 0 ), HEX_DBL( -, 1, 94d1eca95f67a, -,  8 ), HEX_DBL( +, 1, d8d20b0564d5,  -, 61 )},
+{HEX_DBL( +, 1, 01091159e4b3d, +, 0 ), HEX_DBL( -, 1, 7da4047b92b3e, -,  8 ), HEX_DBL( +, 1, 6194a5d68cf2,  -, 66 )},
+{HEX_DBL( +, 1, 00f8f129a0535, +, 0 ), HEX_DBL( -, 1, 667790a09bf77, -,  8 ), HEX_DBL( +, 1, ca230e0bea645, -, 61 )},
+{HEX_DBL( +, 1, 00e8d2ff374a1, +, 0 ), HEX_DBL( -, 1, 4f4c90e9c4ead, -,  8 ), HEX_DBL( +, 1, 1de3e7f350c1,  -, 61 )},
+{HEX_DBL( +, 1, 00d8b6da482ce, +, 0 ), HEX_DBL( -, 1, 3823052860649, -,  8 ), HEX_DBL( +, 1, 5789b4c5891b8, -, 64 )},
+{HEX_DBL( +, 1, 00c89cba71a8c, +, 0 ), HEX_DBL( -, 1, 20faed2dc9a9e, -,  8 ), HEX_DBL( +, 1, 9e7c40f9839fd, -, 62 )},
+{HEX_DBL( +, 1, 00b8849f52834, +, 0 ), HEX_DBL( -, 1, 09d448cb65014, -,  8 ), HEX_DBL( +, 1, 387e3e9b6d02,  -, 62 )},
+{HEX_DBL( +, 1, 00a86e88899a4, +, 0 ), HEX_DBL( -, 1, e55e2fa53ebf1, -,  9 ), HEX_DBL( +, 1, cdaa71fddfddf, -, 62 )},
+{HEX_DBL( +, 1, 00985a75b5e3f, +, 0 ), HEX_DBL( -, 1, b716b429dce0f, -,  9 ), HEX_DBL( +, 1, 2f2af081367bf, -, 63 )},
+{HEX_DBL( +, 1, 00884866766ee, +, 0 ), HEX_DBL( -, 1, 88d21ec7a16d7, -,  9 ), HEX_DBL( +, 1, fb95c228d6f16, -, 62 )},
+{HEX_DBL( +, 1, 0078385a6a61d, +, 0 ), HEX_DBL( -, 1, 5a906f219a9e8, -,  9 ), HEX_DBL( +, 1, 18aff10a89f29, -, 64 )},
+{HEX_DBL( +, 1, 00682a5130fbe, +, 0 ), HEX_DBL( -, 1, 2c51a4dae87f1, -,  9 ), HEX_DBL( +, 1, bcc7e33ddde3,  -, 63 )},
+{HEX_DBL( +, 1, 00581e4a69944, +, 0 ), HEX_DBL( -, 1, fc2b7f2d782b1, -, 10 ), HEX_DBL( +, 1, fe3ef3300a9fa, -, 64 )},
+{HEX_DBL( +, 1, 00481445b39a8, +, 0 ), HEX_DBL( -, 1, 9fb97df0b0b83, -, 10 ), HEX_DBL( +, 1, 0d9a601f2f324, -, 65 )},
+{HEX_DBL( +, 1, 00380c42ae963, +, 0 ), HEX_DBL( -, 1, 434d4546227ae, -, 10 ), HEX_DBL( +, 1, 0b9b6a5868f33, -, 63 )},
+{HEX_DBL( +, 1, 00280640fa271, +, 0 ), HEX_DBL( -, 1, cdcda8e930c19, -, 11 ), HEX_DBL( +, 1, 3d424ab39f789, -, 64 )},
+{HEX_DBL( +, 1, 0018024036051, +, 0 ), HEX_DBL( -, 1, 150c558601261, -, 11 ), HEX_DBL( +, 1, 285bb90327a0f, -, 64 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, ffa011fca0a1e, -, 1 ), HEX_DBL( +, 1, 14e5640c4197b, -, 10 ), HEX_DBL( +, 1, 95728136ae401, -, 63 )},
+{HEX_DBL( +, 1, ff6031f064e07, -, 1 ), HEX_DBL( +, 1, cd61806bf532d, -, 10 ), HEX_DBL( +, 1, 568a4f35d8538, -, 63 )},
+{HEX_DBL( +, 1, ff2061d532b9c, -, 1 ), HEX_DBL( +, 1, 42e34af550eda, -,  9 ), HEX_DBL( +, 1, 8f69cee55fec,  -, 62 )},
+{HEX_DBL( +, 1, fee0a1a513253, -, 1 ), HEX_DBL( +, 1, 9f0a5523902ea, -,  9 ), HEX_DBL( +, 1, daec734b11615, -, 63 )},
+{HEX_DBL( +, 1, fea0f15a12139, -, 1 ), HEX_DBL( +, 1, fb25e19f11b26, -,  9 ), HEX_DBL( +, 1, 8bafca62941da, -, 62 )},
+{HEX_DBL( +, 1, fe6150ee3e6d4, -, 1 ), HEX_DBL( +, 1, 2b9af9a28e282, -,  8 ), HEX_DBL( +, 1, 0fd3674e1dc5b, -, 61 )},
+{HEX_DBL( +, 1, fe21c05baa109, -, 1 ), HEX_DBL( +, 1, 599d4678f24b9, -,  8 ), HEX_DBL( +, 1, dafce1f09937b, -, 61 )},
+{HEX_DBL( +, 1, fde23f9c69cf9, -, 1 ), HEX_DBL( +, 1, 8799d8c046eb,  -,  8 ), HEX_DBL( +, 1, ffa0ce0bdd217, -, 65 )},
+{HEX_DBL( +, 1, fda2ceaa956e8, -, 1 ), HEX_DBL( +, 1, b590b1e5951ee, -,  8 ), HEX_DBL( +, 1, 645a769232446, -, 62 )},
+{HEX_DBL( +, 1, fd636d8047a1f, -, 1 ), HEX_DBL( +, 1, e381d3555dbcf, -,  8 ), HEX_DBL( +, 1, 882320d368331, -, 61 )},
+{HEX_DBL( +, 1, fd241c179e0cc, -, 1 ), HEX_DBL( +, 1, 08b69f3dccde,  -,  7 ), HEX_DBL( +, 1, 01ad5065aba9e, -, 61 )},
+{HEX_DBL( +, 1, fce4da6ab93e8, -, 1 ), HEX_DBL( +, 1, 1fa97a61dd298, -,  7 ), HEX_DBL( +, 1, 84cd1f931ae34, -, 60 )},
+{HEX_DBL( +, 1, fca5a873bcb19, -, 1 ), HEX_DBL( +, 1, 36997bcc54a3f, -,  7 ), HEX_DBL( +, 1, 1485e97eaee03, -, 60 )},
+{HEX_DBL( +, 1, fc66862ccec93, -, 1 ), HEX_DBL( +, 1, 4d86a43264a4f, -,  7 ), HEX_DBL( +, 1, c75e63370988b, -, 61 )},
+{HEX_DBL( +, 1, fc27739018cfe, -, 1 ), HEX_DBL( +, 1, 6470f448fb09d, -,  7 ), HEX_DBL( +, 1, d7361eeaed0a1, -, 65 )},
+{HEX_DBL( +, 1, fbe87097c6f5a, -, 1 ), HEX_DBL( +, 1, 7b586cc4c2523, -,  7 ), HEX_DBL( +, 1, b3df952cc473c, -, 61 )},
+{HEX_DBL( +, 1, fba97d3e084dd, -, 1 ), HEX_DBL( +, 1, 923d0e5a21e06, -,  7 ), HEX_DBL( +, 1, cf56c7b64ae5d, -, 62 )},
+{HEX_DBL( +, 1, fb6a997d0ecdc, -, 1 ), HEX_DBL( +, 1, a91ed9bd3df9a, -,  7 ), HEX_DBL( +, 1, b957bdcd89e43, -, 61 )},
+{HEX_DBL( +, 1, fb2bc54f0f4ab, -, 1 ), HEX_DBL( +, 1, bffdcfa1f7fbb, -,  7 ), HEX_DBL( +, 1, ea8cad9a21771, -, 62 )},
+{HEX_DBL( +, 1, faed00ae41783, -, 1 ), HEX_DBL( +, 1, d6d9f0bbee6f6, -,  7 ), HEX_DBL( +, 1, 5762a9af89c82, -, 60 )},
+{HEX_DBL( +, 1, faae4b94dfe64, -, 1 ), HEX_DBL( +, 1, edb33dbe7d335, -,  7 ), HEX_DBL( +, 1, 21e24fc245697, -, 62 )},
+{HEX_DBL( +, 1, fa6fa5fd27ff8, -, 1 ), HEX_DBL( +, 1, 0244dbae5ed05, -,  6 ), HEX_DBL( +, 1, 12ef51b967102, -, 60 )},
+{HEX_DBL( +, 1, fa310fe15a078, -, 1 ), HEX_DBL( +, 1, 0daeaf24c3529, -,  6 ), HEX_DBL( +, 1, 10d3cfca60b45, -, 59 )},
+{HEX_DBL( +, 1, f9f2893bb9192, -, 1 ), HEX_DBL( +, 1, 1917199bb66bc, -,  6 ), HEX_DBL( +, 1, 6cf6034c32e19, -, 60 )},
+{HEX_DBL( +, 1, f9b412068b247, -, 1 ), HEX_DBL( +, 1, 247e1b6c615d5, -,  6 ), HEX_DBL( +, 1, 42f0fffa229f7, -, 61 )},
+{HEX_DBL( +, 1, f975aa3c18ed6, -, 1 ), HEX_DBL( +, 1, 2fe3b4efcc5ad, -,  6 ), HEX_DBL( +, 1, 70106136a8919, -, 60 )},
+{HEX_DBL( +, 1, f93751d6ae09b, -, 1 ), HEX_DBL( +, 1, 3b47e67edea93, -,  6 ), HEX_DBL( +, 1, 38dd5a4f6959a, -, 59 )},
+{HEX_DBL( +, 1, f8f908d098df6, -, 1 ), HEX_DBL( +, 1, 46aab0725ea6c, -,  6 ), HEX_DBL( +, 1, 821fc1e799e01, -, 60 )},
+{HEX_DBL( +, 1, f8bacf242aa2c, -, 1 ), HEX_DBL( +, 1, 520c1322f1e4e, -,  6 ), HEX_DBL( +, 1, 129dcda3ad563, -, 60 )},
+{HEX_DBL( +, 1, f87ca4cbb755,  -, 1 ), HEX_DBL( +, 1, 5d6c0ee91d2ab, -,  6 ), HEX_DBL( +, 1, c5b190c04606e, -, 62 )},
+{HEX_DBL( +, 1, f83e89c195c25, -, 1 ), HEX_DBL( +, 1, 68caa41d448c3, -,  6 ), HEX_DBL( +, 1, 4723441195ac9, -, 59 )}
 };
 
 static double __loglTable3[8][3] = {
-    { HEX_DBL(+, 1, 000e00c40ab89, +, 0), HEX_DBL(-, 1, 4332be0032168, -, 12),
-      HEX_DBL(+, 1, a1003588d217a, -, 65) },
-    { HEX_DBL(+, 1, 000a006403e82, +, 0), HEX_DBL(-, 1, cdb2987366fcc, -, 13),
-      HEX_DBL(+, 1, 5c86001294bbc, -, 67) },
-    { HEX_DBL(+, 1, 0006002400d8, +, 0), HEX_DBL(-, 1, 150297c90fa6f, -, 13),
-      HEX_DBL(+, 1, 01fb4865fae32, -, 66) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, ffe8011ff280a, -, 1), HEX_DBL(+, 1, 14f8daf5e3d3b, -, 12),
-      HEX_DBL(+, 1, 3c933b4b6b914, -, 68) },
-    { HEX_DBL(+, 1, ffd8031fc184e, -, 1), HEX_DBL(+, 1, cd978c38042bb, -, 12),
-      HEX_DBL(+, 1, 10f8e642e66fd, -, 65) },
-    { HEX_DBL(+, 1, ffc8061f5492b, -, 1), HEX_DBL(+, 1, 43183c878274e, -, 11),
-      HEX_DBL(+, 1, 5885dd1eb6582, -, 65) }
+{HEX_DBL( +, 1, 000e00c40ab89, +, 0 ), HEX_DBL( -, 1, 4332be0032168, -, 12 ), HEX_DBL( +, 1, a1003588d217a, -, 65 )},
+{HEX_DBL( +, 1, 000a006403e82, +, 0 ), HEX_DBL( -, 1, cdb2987366fcc, -, 13 ), HEX_DBL( +, 1, 5c86001294bbc, -, 67 )},
+{HEX_DBL( +, 1, 0006002400d8,  +, 0 ), HEX_DBL( -, 1, 150297c90fa6f, -, 13 ), HEX_DBL( +, 1, 01fb4865fae32, -, 66 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, ffe8011ff280a, -, 1 ), HEX_DBL( +, 1, 14f8daf5e3d3b, -, 12 ), HEX_DBL( +, 1, 3c933b4b6b914, -, 68 )},
+{HEX_DBL( +, 1, ffd8031fc184e, -, 1 ), HEX_DBL( +, 1, cd978c38042bb, -, 12 ), HEX_DBL( +, 1, 10f8e642e66fd, -, 65 )},
+{HEX_DBL( +, 1, ffc8061f5492b, -, 1 ), HEX_DBL( +, 1, 43183c878274e, -, 11 ), HEX_DBL( +, 1, 5885dd1eb6582, -, 65 )}
 };
 
 static void __log2_ep(double *hi, double *lo, double x)
 {
-    union {
-        uint64_t i;
-        double d;
-    } uu;
+    union { uint64_t i; double d; } uu;
 
     int m;
     double f = reference_frexp(x, &m);
 
     // bring f in [0.75, 1.5)
-    if (f < 0.75)
-    {
+    if( f < 0.75 ) {
         f *= 2.0;
         m -= 1;
     }
 
     // index first table .... brings down to [1-2^-7, 1+2^6)
     uu.d = f;
-    int index =
-        (int)(((uu.i + ((uint64_t)1 << 51)) & 0x000fc00000000000ULL) >> 46);
+    int index = (int) (((uu.i + ((uint64_t) 1 << 51)) & 0x000fc00000000000ULL) >> 46);
     double r1 = __loglTable1[index][0];
     double logr1hi = __loglTable1[index][1];
     double logr1lo = __loglTable1[index][2];
-    // since log1rhi has 39 bits of precision, we have 14 bit in hand ... since
-    // |m| <= 1023 which needs 10bits at max, we can directly add m to log1hi
-    // without spilling
+    // since log1rhi has 39 bits of precision, we have 14 bit in hand ... since |m| <= 1023
+    // which needs 10bits at max, we can directly add m to log1hi without spilling
     logr1hi += m;
 
-    // argument reduction needs to be in double-double since reduced argument
-    // will form the leading term of polynomial approximation which sets the
-    // precision we eventually achieve
+    // argument reduction needs to be in double-double since reduced argument will form the
+    // leading term of polynomial approximation which sets the precision we eventually achieve
     double zhi, zlo;
     MulD(&zhi, &zlo, r1, uu.d);
 
     // second index table .... brings down to [1-2^-12, 1+2^-11)
     uu.d = zhi;
-    index = (int)(((uu.i + ((uint64_t)1 << 46)) & 0x00007e0000000000ULL) >> 41);
+    index = (int) (((uu.i + ((uint64_t) 1 << 46)) & 0x00007e0000000000ULL) >> 41);
     double r2 = __loglTable2[index][0];
     double logr2hi = __loglTable2[index][1];
     double logr2lo = __loglTable2[index][2];
@@ -4466,12 +4170,11 @@
     // Actually reduction to 2^-11 would have been sufficient to calculate
     // second order term in polynomial in double rather than double-double, I
     // reduced it a bit more to make sure other systematic arithmetic errors
-    // are guarded against .... also this allow lower order product of leading
-    // polynomial term i.e. Ao_hi*z_lo + Ao_lo*z_hi to be done in double rather
-    // than double-double ... hence only term that needs to be done in
-    // double-double is Ao_hi*z_hi
+    // are guarded against .... also this allow lower order product of leading polynomial
+    // term i.e. Ao_hi*z_lo + Ao_lo*z_hi to be done in double rather than double-double ...
+    // hence only term that needs to be done in double-double is Ao_hi*z_hi
     uu.d = zhi;
-    index = (int)(((uu.i + ((uint64_t)1 << 41)) & 0x0000038000000000ULL) >> 39);
+    index = (int) (((uu.i + ((uint64_t) 1 << 41)) & 0x0000038000000000ULL) >> 39);
     double r3 = __loglTable3[index][0];
     double logr3hi = __loglTable3[index][1];
     double logr3lo = __loglTable3[index][2];
@@ -4483,36 +4186,34 @@
     AddDD(&log2hi, &log2lo, logr1hi, logr1lo, logr2hi, logr2lo);
     AddDD(&log2hi, &log2lo, logr3hi, logr3lo, log2hi, log2lo);
 
-    // final argument reduction .... zhi will be in [1-2^-14, 1+2^-13) after
-    // this
+    // final argument reduction .... zhi will be in [1-2^-14, 1+2^-13) after this
     MulDD(&zhi, &zlo, zhi, zlo, r3, 0.0);
-    // we dont need to do full double-double substract here. substracting 1.0
-    // for higher term is exact
+    // we dont need to do full double-double substract here. substracting 1.0 for higher
+    // term is exact
     zhi = zhi - 1.0;
     // normalize
     AddD(&zhi, &zlo, zhi, zlo);
 
     // polynomail fitting to compute log2(1 + z) ... forth order polynomial fit
-    // to log2(1 + z)/z gives minimax absolute error of O(2^-76) with z in
-    // [-2^-14, 2^-13] log2(1 + z)/z = Ao + A1*z + A2*z^2 + A3*z^3 + A4*z^4
+    // to log2(1 + z)/z gives minimax absolute error of O(2^-76) with z in [-2^-14, 2^-13]
+    // log2(1 + z)/z = Ao + A1*z + A2*z^2 + A3*z^3 + A4*z^4
     // => log2(1 + z) = Ao*z + A1*z^2 + A2*z^3 + A3*z^4 + A4*z^5
-    // => log2(1 + z) = (Aohi + Aolo)*(zhi + zlo) + z^2*(A1 + A2*z + A3*z^2 +
-    // A4*z^3) since we are looking for at least 64 digits of precision and z in
-    // [-2^-14, 2^-13], final term can be done in double .... also Aolo*zhi +
-    // Aohi*zlo can be done in double .... Aohi*zhi needs to be done in
-    // double-double
+    // => log2(1 + z) = (Aohi + Aolo)*(zhi + zlo) + z^2*(A1 + A2*z + A3*z^2 + A4*z^3)
+    // since we are looking for at least 64 digits of precision and z in [-2^-14, 2^-13], final term
+    // can be done in double .... also Aolo*zhi + Aohi*zlo can be done in double ....
+    // Aohi*zhi needs to be done in double-double
 
-    double Aohi = HEX_DBL(+, 1, 71547652b82fe, +, 0);
-    double Aolo = HEX_DBL(+, 1, 777c9cbb675c, -, 56);
+    double Aohi = HEX_DBL( +, 1, 71547652b82fe, +, 0 );
+    double Aolo = HEX_DBL( +, 1, 777c9cbb675c, -, 56 );
     double y;
-    y = HEX_DBL(+, 1, 276d2736fade7, -, 2);
-    y = HEX_DBL(-, 1, 7154765782df1, -, 2) + y * zhi;
-    y = HEX_DBL(+, 1, ec709dc3a0f67, -, 2) + y * zhi;
-    y = HEX_DBL(-, 1, 71547652b82fe, -, 1) + y * zhi;
-    double zhisq = zhi * zhi;
-    y = y * zhisq;
-    y = y + zhi * Aolo;
-    y = y + zlo * Aohi;
+    y = HEX_DBL( +, 1, 276d2736fade7, -, 2 );
+    y = HEX_DBL( -, 1, 7154765782df1, -, 2 ) + y*zhi;
+    y = HEX_DBL( +, 1, ec709dc3a0f67, -, 2 ) + y*zhi;
+    y = HEX_DBL( -, 1, 71547652b82fe, -, 1 ) + y*zhi;
+    double zhisq = zhi*zhi;
+    y = y*zhisq;
+    y = y + zhi*Aolo;
+    y = y + zlo*Aohi;
 
     MulD(&zhi, &zlo, Aohi, zhi);
     AddDD(&zhi, &zlo, zhi, zlo, y, 0.0);
@@ -4522,8 +4223,10 @@
     *lo = zlo;
 }
 
-long double reference_powl(long double x, long double y)
+long double reference_powl( long double x, long double y )
 {
+
+
     // this will be used for testing doubles i.e. arguments will
     // be doubles so cast the input back to double ... returned
     // result will be long double though .... > 53 bits of precision
@@ -4537,163 +4240,174 @@
     // causes errors. So we need to tread y as long double and convert it
     // to hi, lo doubles when performing y*log2(x).
 
-    static const double neg_epsilon = HEX_DBL(+, 1, 0, +, 53);
+//    double x = (double) xx;
+//    double y = (double) yy;
 
-    // if x = 1, return x for any y, even NaN
-    if (x == 1.0) return x;
+    static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 );
 
-    // if y == 0, return 1 for any x, even NaN
-    if (y == 0.0) return 1.0L;
+    //if x = 1, return x for any y, even NaN
+    if( x == 1.0 )
+        return x;
 
-    // get NaNs out of the way
-    if (x != x || y != y) return x + y;
+    //if y == 0, return 1 for any x, even NaN
+    if( y == 0.0 )
+        return 1.0L;
 
-    // do the work required to sort out edge cases
-    double fabsy = reference_fabs(y);
-    double fabsx = reference_fabs(x);
-    double iy = reference_rint(
-        fabsy); // we do round to nearest here so that |fy| <= 0.5
-    if (iy > fabsy) // convert nearbyint to floor
+    //get NaNs out of the way
+    if( x != x  || y != y )
+        return x + y;
+
+    //do the work required to sort out edge cases
+    double fabsy = reference_fabs( y );
+    double fabsx = reference_fabs( x );
+    double iy = reference_rint( fabsy );            //we do round to nearest here so that |fy| <= 0.5
+    if( iy > fabsy )//convert nearbyint to floor
         iy -= 1.0;
     int isOddInt = 0;
-    if (fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon)
-        isOddInt = (int)(iy - 2.0 * rint(0.5 * iy)); // might be 0, -1, or 1
+    if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon )
+        isOddInt =     (int) (iy - 2.0 * rint( 0.5 * iy ));        //might be 0, -1, or 1
 
-    /// test a few more edge cases
-    // deal with x == 0 cases
-    if (x == 0.0)
+    ///test a few more edge cases
+    //deal with x == 0 cases
+    if( x == 0.0 )
     {
-        if (!isOddInt) x = 0.0;
+        if( ! isOddInt )
+            x = 0.0;
 
-        if (y < 0) x = 1.0 / x;
+        if( y < 0 )
+            x = 1.0/ x;
 
         return x;
     }
 
-    // x == +-Inf cases
-    if (isinf(fabsx))
+    //x == +-Inf cases
+    if( isinf(fabsx) )
     {
-        if (x < 0)
+        if( x < 0 )
         {
-            if (isOddInt)
+            if( isOddInt )
             {
-                if (y < 0)
+                if( y < 0 )
                     return -0.0;
                 else
                     return -INFINITY;
             }
             else
             {
-                if (y < 0)
+                if( y < 0 )
                     return 0.0;
                 else
                     return INFINITY;
             }
         }
 
-        if (y < 0) return 0;
+        if( y < 0 )
+            return 0;
         return INFINITY;
     }
 
-    // y = +-inf cases
-    if (isinf(fabsy))
+    //y = +-inf cases
+    if( isinf(fabsy) )
     {
-        if (x == -1) return 1;
+        if( x == -1 )
+            return 1;
 
-        if (y < 0)
+        if( y < 0 )
         {
-            if (fabsx < 1) return INFINITY;
+            if( fabsx < 1 )
+                return INFINITY;
             return 0;
         }
-        if (fabsx < 1) return 0;
+        if( fabsx < 1 )
+            return 0;
         return INFINITY;
     }
 
     // x < 0 and y non integer case
-    if (x < 0 && iy != fabsy)
+    if( x < 0 && iy != fabsy )
     {
-        // return nan;
+        //return nan;
         return cl_make_nan();
     }
 
-    // speedy resolution of sqrt and reciprocal sqrt
-    if (fabsy == 0.5)
+    //speedy resolution of sqrt and reciprocal sqrt
+    if( fabsy == 0.5 )
     {
-        long double xl = sqrtl(x);
-        if (y < 0) xl = 1.0 / xl;
+        long double xl = sqrtl( x );
+        if( y < 0 )
+            xl = 1.0/ xl;
         return xl;
     }
 
     double log2x_hi, log2x_lo;
 
-    // extended precision log .... accurate to at least 64-bits + couple of
-    // guard bits
+    // extended precision log .... accurate to at least 64-bits + couple of guard bits
     __log2_ep(&log2x_hi, &log2x_lo, fabsx);
 
     double ylog2x_hi, ylog2x_lo;
 
-    double y_hi = (double)y;
-    double y_lo = (double)(y - (long double)y_hi);
+    double y_hi = (double) y;
+    double y_lo = (double) ( y - (long double) y_hi);
 
     // compute product of y*log2(x)
     // scale to avoid overflow in double-double multiplication
-    if (reference_fabs(y) > HEX_DBL(+, 1, 0, +, 970))
-    {
+    if( reference_fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) {
         y_hi = reference_ldexp(y_hi, -53);
         y_lo = reference_ldexp(y_lo, -53);
     }
     MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
-    if (fabs(y) > HEX_DBL(+, 1, 0, +, 970))
-    {
+    if( fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) {
         ylog2x_hi = reference_ldexp(ylog2x_hi, 53);
         ylog2x_lo = reference_ldexp(ylog2x_lo, 53);
     }
 
     long double powxy;
-    if (isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200))
-    {
-        powxy =
-            reference_signbit(ylog2x_hi) ? HEX_DBL(+, 0, 0, +, 0) : INFINITY;
-    }
-    else
-    {
+    if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) {
+        powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY;
+    } else {
         // separate integer + fractional part
         long int m = lrint(ylog2x_hi);
         AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0);
 
         // revert to long double arithemtic
-        long double ylog2x = (long double)ylog2x_hi + (long double)ylog2x_lo;
-        long double tmp = reference_exp2l(ylog2x);
+        long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo;
+        long double tmp = reference_exp2l( ylog2x );
         powxy = reference_scalblnl(tmp, m);
     }
 
     // if y is odd integer and x is negative, reverse sign
-    if (isOddInt & reference_signbit(x)) powxy = -powxy;
+    if( isOddInt & reference_signbit(x))
+        powxy = -powxy;
     return powxy;
 }
 
 double reference_nextafter(double xx, double yy)
 {
-    float x = (float)xx;
-    float y = (float)yy;
+    float x = (float) xx;
+    float y = (float) yy;
 
     // take care of nans
-    if (x != x) return x;
+    if( x != x )
+        return x;
 
-    if (y != y) return y;
+    if( y != y )
+        return y;
 
-    if (x == y) return y;
+    if( x == y )
+        return y;
 
     int32f_t a, b;
 
-    a.f = x;
-    b.f = y;
+    a.f  = x;
+    b.f  = y;
 
-    if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
-    if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
+    if( a.i & 0x80000000 )
+        a.i = 0x80000000 - a.i;
+    if(b.i & 0x80000000 )
+        b.i = 0x80000000 - b.i;
 
     a.i += (a.i < b.i) ? 1 : -1;
-    a.i = (a.i < 0) ? (cl_int)0x80000000 - a.i : a.i;
+    a.i = (a.i < 0) ? (cl_int) 0x80000000 - a.i : a.i;
 
     return a.f;
 }
@@ -4701,28 +4415,33 @@
 
 long double reference_nextafterl(long double xx, long double yy)
 {
-    double x = (double)xx;
-    double y = (double)yy;
+    double x = (double) xx;
+    double y = (double) yy;
 
     // take care of nans
-    if (x != x) return x;
+    if( x != x )
+        return x;
 
-    if (y != y) return y;
+    if( y != y )
+        return y;
 
     int64d_t a, b;
 
-    a.d = x;
-    b.d = y;
+    a.d  = x;
+    b.d  = y;
 
     int64_t tmp = 0x8000000000000000LL;
 
-    if (a.l & tmp) a.l = tmp - a.l;
-    if (b.l & tmp) b.l = tmp - b.l;
+    if( a.l & tmp )
+        a.l = tmp - a.l;
+    if(b.l & tmp )
+        b.l = tmp - b.l;
 
-    // edge case. if (x == y) or (x = 0.0f and y = -0.0f) or (x = -0.0f and y =
-    // 0.0f) test needs to be done using integer rep because subnormals may be
-    // flushed to zero on some platforms
-    if (a.l == b.l) return y;
+    // edge case. if (x == y) or (x = 0.0f and y = -0.0f) or (x = -0.0f and y = 0.0f)
+    // test needs to be done using integer rep because
+    // subnormals may be flushed to zero on some platforms
+    if( a.l == b.l )
+        return y;
 
     a.l += (a.l < b.l) ? 1 : -1;
     a.l = (a.l < 0) ? tmp - a.l : a.l;
@@ -4732,108 +4451,112 @@
 
 double reference_fdim(double xx, double yy)
 {
-    float x = (float)xx;
-    float y = (float)yy;
+    float x = (float) xx;
+    float y = (float) yy;
 
-    if (x != x) return x;
+    if( x != x )
+        return x;
 
-    if (y != y) return y;
+    if( y != y )
+        return y;
 
-    float r = (x > y) ? (float)reference_subtract(x, y) : 0.0f;
+    float r = ( x > y ) ? (float) reference_subtract( x, y) : 0.0f;
     return r;
+
 }
 
 
 long double reference_fdiml(long double xx, long double yy)
 {
-    double x = (double)xx;
-    double y = (double)yy;
+    double x = (double) xx;
+    double y = (double) yy;
 
-    if (x != x) return x;
+    if( x != x )
+        return x;
 
-    if (y != y) return y;
+    if( y != y )
+        return y;
 
-    double r = (x > y) ? (double)reference_subtractl(x, y) : 0.0;
+    double r = ( x > y ) ? (double) reference_subtractl(x, y) : 0.0;
     return r;
 }
 
 double reference_remquo(double xd, double yd, int *n)
 {
-    float xx = (float)xd;
-    float yy = (float)yd;
+    float xx = (float) xd;
+    float yy = (float) yd;
 
-    if (isnan(xx) || isnan(yy) || fabsf(xx) == INFINITY || yy == 0.0)
+    if( isnan(xx) || isnan(yy) ||
+        fabsf(xx) == INFINITY  ||
+        yy == 0.0 )
     {
         *n = 0;
         return cl_make_nan();
     }
 
-    if (fabsf(yy) == INFINITY || xx == 0.0f)
-    {
+    if( fabsf(yy) == INFINITY || xx == 0.0f ) {
         *n = 0;
         return xd;
     }
 
-    if (fabsf(xx) == fabsf(yy))
-    {
+    if( fabsf(xx) == fabsf(yy) ) {
         *n = (xx == yy) ? 1 : -1;
-        return reference_signbit(xx) ? -0.0 : 0.0;
+        return reference_signbit( xx ) ? -0.0 : 0.0;
     }
 
-    int signx = reference_signbit(xx) ? -1 : 1;
-    int signy = reference_signbit(yy) ? -1 : 1;
+    int signx = reference_signbit( xx ) ? -1 : 1;
+    int signy = reference_signbit( yy ) ? -1 : 1;
     int signn = (signx == signy) ? 1 : -1;
     float x = fabsf(xx);
     float y = fabsf(yy);
 
     int ex, ey;
-    ex = reference_ilogb(x);
-    ey = reference_ilogb(y);
+    ex = reference_ilogb( x );
+    ey = reference_ilogb( y );
     float xr = x;
     float yr = y;
     uint32_t q = 0;
 
-    if (ex - ey >= -1)
-    {
+    if(ex-ey >= -1) {
 
-        yr = (float)reference_ldexp(y, -ey);
-        xr = (float)reference_ldexp(x, -ex);
+        yr = (float) reference_ldexp( y, -ey );
+        xr = (float) reference_ldexp( x, -ex );
 
-        if (ex - ey >= 0)
-        {
+        if(ex-ey >= 0) {
+
+
             int i;
-            for (i = ex - ey; i > 0; i--)
-            {
+            for(i = ex-ey; i > 0; i--) {
                 q <<= 1;
-                if (xr >= yr)
-                {
+                if(xr >= yr) {
                     xr -= yr;
                     q += 1;
                 }
                 xr += xr;
             }
             q <<= 1;
-            if (xr > yr)
-            {
+            if( xr > yr ) {
                 xr -= yr;
                 q += 1;
             }
         }
-        else // ex-ey = -1
-            xr = reference_ldexp(xr, ex - ey);
+        else //ex-ey = -1
+            xr = reference_ldexp(xr, ex-ey);
     }
 
-    if ((yr < 2.0f * xr) || ((yr == 2.0f * xr) && (q & 0x00000001)))
-    {
+    if( (yr < 2.0f*xr) || ( (yr == 2.0f*xr) && (q & 0x00000001) ) ) {
         xr -= yr;
         q += 1;
     }
 
-    if (ex - ey >= -1) xr = reference_ldexp(xr, ey);
+    if(ex-ey >= -1)
+        xr = reference_ldexp(xr, ey);
 
     int qout = q & 0x0000007f;
-    if (signn < 0) qout = -qout;
-    if (xx < 0.0) xr = -xr;
+    if( signn < 0)
+        qout = -qout;
+    if( xx < 0.0 )
+        xr = -xr;
 
     *n = qout;
 
@@ -4842,80 +4565,80 @@
 
 long double reference_remquol(long double xd, long double yd, int *n)
 {
-    double xx = (double)xd;
-    double yy = (double)yd;
 
-    if (isnan(xx) || isnan(yy) || fabs(xx) == INFINITY || yy == 0.0)
+    double xx = (double) xd;
+    double yy = (double) yd;
+
+    if( isnan(xx) || isnan(yy) ||
+        fabs(xx) == INFINITY  ||
+        yy == 0.0 )
     {
         *n = 0;
         return cl_make_nan();
     }
 
-    if (reference_fabs(yy) == INFINITY || xx == 0.0)
-    {
+    if( reference_fabs(yy) == INFINITY || xx == 0.0 ) {
         *n = 0;
         return xd;
     }
 
-    if (reference_fabs(xx) == reference_fabs(yy))
-    {
+    if( reference_fabs(xx) == reference_fabs(yy) ) {
         *n = (xx == yy) ? 1 : -1;
-        return reference_signbit(xx) ? -0.0 : 0.0;
+        return reference_signbit( xx ) ? -0.0 : 0.0;
     }
 
-    int signx = reference_signbit(xx) ? -1 : 1;
-    int signy = reference_signbit(yy) ? -1 : 1;
+    int signx = reference_signbit( xx ) ? -1 : 1;
+    int signy = reference_signbit( yy ) ? -1 : 1;
     int signn = (signx == signy) ? 1 : -1;
     double x = reference_fabs(xx);
     double y = reference_fabs(yy);
 
     int ex, ey;
-    ex = reference_ilogbl(x);
-    ey = reference_ilogbl(y);
+    ex = reference_ilogbl( x );
+    ey = reference_ilogbl( y );
     double xr = x;
     double yr = y;
     uint32_t q = 0;
 
-    if (ex - ey >= -1)
-    {
-        yr = reference_ldexp(y, -ey);
-        xr = reference_ldexp(x, -ex);
+    if(ex-ey >= -1) {
+
+        yr = reference_ldexp( y, -ey );
+        xr = reference_ldexp( x, -ex );
         int i;
 
-        if (ex - ey >= 0)
-        {
-            for (i = ex - ey; i > 0; i--)
-            {
+        if(ex-ey >= 0) {
+
+            for(i = ex-ey; i > 0; i--) {
                 q <<= 1;
-                if (xr >= yr)
-                {
+                if(xr >= yr) {
                     xr -= yr;
                     q += 1;
                 }
                 xr += xr;
             }
             q <<= 1;
-            if (xr > yr)
-            {
+            if( xr > yr ) {
                 xr -= yr;
                 q += 1;
             }
         }
         else
-            xr = reference_ldexp(xr, ex - ey);
+            xr = reference_ldexp(xr, ex-ey);
     }
 
-    if ((yr < 2.0 * xr) || ((yr == 2.0 * xr) && (q & 0x00000001)))
-    {
+    if( (yr < 2.0*xr) || ( (yr == 2.0*xr) && (q & 0x00000001) ) ) {
         xr -= yr;
         q += 1;
     }
 
-    if (ex - ey >= -1) xr = reference_ldexp(xr, ey);
+    if(ex-ey >= -1)
+        xr = reference_ldexp(xr, ey);
 
     int qout = q & 0x0000007f;
-    if (signn < 0) qout = -qout;
-    if (xx < 0.0) xr = -xr;
+    if( signn < 0)
+        qout = -qout;
+    if( xx < 0.0 )
+        xr = -xr;
 
     *n = qout;
     return xr;
@@ -4923,27 +4646,27 @@
 
 static double reference_scalbn(double x, int n)
 {
-    if (reference_isinf(x) || reference_isnan(x) || x == 0.0) return x;
+    if(reference_isinf(x) || reference_isnan(x) || x == 0.0)
+        return x;
 
     int bias = 1023;
-    union {
-        double d;
-        cl_long l;
-    } u;
-    u.d = (double)x;
+    union { double d; cl_long l; } u;
+    u.d = (double) x;
     int e = (int)((u.l & 0x7ff0000000000000LL) >> 52);
-    if (e == 0)
+    if(e == 0)
     {
         u.l |= ((cl_long)1023 << 52);
         u.d -= 1.0;
         e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
     }
     e += n;
-    if (e >= 2047 || n >= 2098) return reference_copysign(INFINITY, x);
-    if (e < -51 || n < -2097) return reference_copysign(0.0, x);
-    if (e <= 0)
+    if(e >= 2047 || n >= 2098 )
+        return reference_copysign(INFINITY, x);
+    if(e < -51 || n <-2097 )
+        return reference_copysign(0.0, x);
+    if(e <= 0)
     {
-        bias += (e - 1);
+        bias += (e-1);
         e = 1;
     }
     u.l &= 0x800fffffffffffffLL;
@@ -4956,26 +4679,26 @@
 static long double reference_scalblnl(long double x, long n)
 {
 #if defined(__i386__) || defined(__x86_64__) // INTEL
-    union {
+    union
+    {
         long double d;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } u;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
     u.u.m = CL_LONG_MIN;
 
-    if (reference_isinf(x)) return x;
+    if ( reference_isinf(x) )
+        return x;
 
-    if (x == 0.0L || n < -2200) return reference_copysignl(0.0L, x);
+    if( x == 0.0L || n < -2200)
+        return reference_copysignl( 0.0L, x );
 
-    if (n > 2200) return reference_copysignl(INFINITY, x);
+    if( n > 2200 )
+        return reference_copysignl( INFINITY, x );
 
-    if (n < 0)
+    if( n < 0 )
     {
         u.u.sexp = 0x3fff - 1022;
-        while (n <= -1022)
+        while( n <= -1022 )
         {
             x *= u.d;
             n += 1022;
@@ -4985,10 +4708,10 @@
         return x;
     }
 
-    if (n > 0)
+    if( n > 0 )
     {
         u.u.sexp = 0x3fff + 1023;
-        while (n >= 1023)
+        while( n >= 1023 )
         {
             x *= u.d;
             n -= 1023;
@@ -5003,27 +4726,27 @@
 #elif defined(__arm__) // ARM .. sizeof(long double) == sizeof(double)
 
 #if __DBL_MAX_EXP__ >= __LDBL_MAX_EXP__
-    if (reference_isinfl(x) || reference_isnanl(x)) return x;
+    if(reference_isinfl(x) || reference_isnanl(x))
+        return x;
 
     int bias = 1023;
-    union {
-        double d;
-        cl_long l;
-    } u;
-    u.d = (double)x;
+    union { double d; cl_long l; } u;
+    u.d = (double) x;
     int e = (int)((u.l & 0x7ff0000000000000LL) >> 52);
-    if (e == 0)
+    if(e == 0)
     {
         u.l |= ((cl_long)1023 << 52);
         u.d -= 1.0;
         e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
     }
     e += n;
-    if (e >= 2047) return reference_copysignl(INFINITY, x);
-    if (e < -51) return reference_copysignl(0.0, x);
-    if (e <= 0)
+    if(e >= 2047)
+        return reference_copysignl(INFINITY, x);
+    if(e < -51)
+        return reference_copysignl(0.0, x);
+    if(e <= 0)
     {
-        bias += (e - 1);
+        bias += (e-1);
         e = 1;
     }
     u.l &= 0x800fffffffffffffLL;
@@ -5033,255 +4756,284 @@
     return x * u.d;
 #endif
 
-#else // PPC
+#else  // PPC
     return scalblnl(x, n);
 #endif
 }
 
-double reference_relaxed_exp(double x) { return reference_exp(x); }
+double reference_relaxed_exp( double x )
+{
+  return reference_exp(x);
+}
 
 double reference_exp(double x)
 {
-    return reference_exp2(x * HEX_DBL(+, 1, 71547652b82fe, +, 0));
+  return reference_exp2( x * HEX_DBL( +, 1, 71547652b82fe, +, 0 ) );
 }
 
 long double reference_expl(long double x)
 {
 #if defined(__PPC__)
-    long double scale, bias;
+  long double scale, bias;
 
-    // The PPC double long version of expl fails to produce denorm results
-    // and instead generates a 0.0. Compensate for this limitation by
-    // computing expl as:
-    //     expl(x + 40) * expl(-40)
-    // Likewise, overflows can prematurely produce an infinity, so we
-    // compute expl as:
-    //     expl(x - 40) * expl(40)
-    scale = 1.0L;
-    bias = 0.0L;
-    if (x < -708.0L)
-    {
-        bias = 40.0;
-        scale = expl(-40.0L);
-    }
-    else if (x > 708.0L)
-    {
-        bias = -40.0L;
-        scale = expl(40.0L);
-    }
-    return expl(x + bias) * scale;
+  // The PPC double long version of expl fails to produce denorm results
+  // and instead generates a 0.0. Compensate for this limitation by
+  // computing expl as:
+  //     expl(x + 40) * expl(-40)
+  // Likewise, overflows can prematurely produce an infinity, so we
+  // compute expl as:
+  //     expl(x - 40) * expl(40)
+  scale = 1.0L;
+  bias = 0.0L;
+  if (x < -708.0L) {
+    bias = 40.0;
+    scale = expl(-40.0L);
+  } else if (x > 708.0L) {
+    bias = -40.0L;
+    scale = expl(40.0L);
+  }
+  return expl(x + bias) * scale;
 #else
-    return expl(x);
+    return expl( x );
 #endif
 }
 
-double reference_sinh(double x) { return sinh(x); }
+double reference_sinh(double x)
+{
+    return sinh(x);
+}
 
-long double reference_sinhl(long double x) { return sinhl(x); }
+long double reference_sinhl(long double x)
+{
+    return sinhl(x);
+}
 
 double reference_fmod(double x, double y)
 {
-    if (x == 0.0 && fabs(y) > 0.0) return x;
+    if( x == 0.0 && fabs(y) > 0.0 )
+        return x;
 
-    if (fabs(x) == INFINITY || y == 0) return cl_make_nan();
+    if( fabs(x) == INFINITY || y == 0 )
+        return cl_make_nan();
 
-    if (fabs(y) == INFINITY) // we know x is finite from above
+    if( fabs(y) == INFINITY )    // we know x is finite from above
         return x;
 #if defined(_MSC_VER) && defined(_M_X64)
-    return fmod(x, y);
+    return fmod( x, y );
 #else
-    return fmodf((float)x, (float)y);
+    return fmodf( (float) x, (float) y );
 #endif
 }
 
 long double reference_fmodl(long double x, long double y)
 {
-    if (x == 0.0L && fabsl(y) > 0.0L) return x;
-
-    if (fabsl(x) == INFINITY || y == 0.0L) return cl_make_nan();
-
-    if (fabsl(y) == INFINITY) // we know x is finite from above
+    if( x == 0.0L && fabsl(y) > 0.0L )
         return x;
 
-    return fmod((double)x, (double)y);
+    if( fabsl(x) == INFINITY || y == 0.0L )
+        return cl_make_nan();
+
+    if( fabsl(y) == INFINITY )    // we know x is finite from above
+        return x;
+
+    return fmod( (double) x, (double) y );
 }
 
 double reference_modf(double x, double *n)
 {
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *n = cl_make_nan();
         return cl_make_nan();
     }
     float nr;
-    float yr = modff((float)x, &nr);
+    float yr = modff((float) x, &nr);
     *n = nr;
     return yr;
 }
 
 long double reference_modfl(long double x, long double *n)
 {
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *n = cl_make_nan();
         return cl_make_nan();
     }
     double nr;
-    double yr = modf((double)x, &nr);
+    double yr = modf((double) x, &nr);
     *n = nr;
     return yr;
 }
 
-long double reference_fractl(long double x, long double *ip)
+long double reference_fractl(long double x, long double *ip )
 {
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *ip = cl_make_nan();
         return cl_make_nan();
     }
 
     double i;
-    double f = modf((double)x, &i);
-    if (f < 0.0)
+    double f = modf((double) x, &i );
+    if( f < 0.0 )
     {
         f = 1.0 + f;
         i -= 1.0;
-        if (f == 1.0) f = HEX_DBL(+, 1, fffffffffffff, -, 1);
+        if( f == 1.0 )
+            f = HEX_DBL( +, 1, fffffffffffff, -, 1 );
     }
     *ip = i;
     return f;
 }
 
-long double reference_fabsl(long double x) { return fabsl(x); }
-
-double reference_relaxed_log(double x)
+long double reference_fabsl(long double x)
 {
-    return (float)reference_log((float)x);
+    return fabsl( x );
+}
+
+double reference_relaxed_log( double x )
+{
+  return (float)reference_log((float)x);
 }
 
 double reference_log(double x)
 {
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
 
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
 
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
 
-    double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1);
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
-    return logxHi * log2Hi;
+    return logxHi*log2Hi;
 }
 
 long double reference_logl(long double x)
 {
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
 
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
 
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
 
-    double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1);
-    double log2Lo = HEX_DBL(+, 1, abc9e3b39803f, -, 56);
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
+    double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
 
-    long double lg2 = (long double)log2Hi + (long double)log2Lo;
-    long double logx = (long double)logxHi + (long double)logxLo;
-    return logx * lg2;
+    //double rhi, rlo;
+    //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo);
+    //return (long double) rhi + (long double) rlo;
+
+    long double lg2 = (long double) log2Hi + (long double) log2Lo;
+    long double logx = (long double) logxHi + (long double) logxLo;
+    return logx*lg2;
 }
 
-double reference_relaxed_pow(double x, double y)
-{
-    return (float)reference_exp2(((float)y) * (float)reference_log2((float)x));
+double reference_relaxed_pow( double x, double y) {
+  return (float)reference_exp2( ((float)y) * (float)reference_log2((float)x));
 }
 
-double reference_pow(double x, double y)
+double reference_pow( double x, double y )
 {
-    static const double neg_epsilon = HEX_DBL(+, 1, 0, +, 53);
+    static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 );
 
-    // if x = 1, return x for any y, even NaN
-    if (x == 1.0) return x;
+    //if x = 1, return x for any y, even NaN
+    if( x == 1.0 )
+        return x;
 
-    // if y == 0, return 1 for any x, even NaN
-    if (y == 0.0) return 1.0;
+    //if y == 0, return 1 for any x, even NaN
+    if( y == 0.0 )
+        return 1.0;
 
-    // get NaNs out of the way
-    if (x != x || y != y) return x + y;
+    //get NaNs out of the way
+    if( x != x  || y != y )
+        return x + y;
 
-    // do the work required to sort out edge cases
-    double fabsy = reference_fabs(y);
-    double fabsx = reference_fabs(x);
-    double iy = reference_rint(
-        fabsy); // we do round to nearest here so that |fy| <= 0.5
-    if (iy > fabsy) // convert nearbyint to floor
+    //do the work required to sort out edge cases
+    double fabsy = reference_fabs( y );
+    double fabsx = reference_fabs( x );
+    double iy = reference_rint( fabsy );            //we do round to nearest here so that |fy| <= 0.5
+    if( iy > fabsy )//convert nearbyint to floor
         iy -= 1.0;
     int isOddInt = 0;
-    if (fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon)
-        isOddInt = (int)(iy - 2.0 * rint(0.5 * iy)); // might be 0, -1, or 1
+    if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon )
+        isOddInt =     (int) (iy - 2.0 * rint( 0.5 * iy ));        //might be 0, -1, or 1
 
-    /// test a few more edge cases
-    // deal with x == 0 cases
-    if (x == 0.0)
+    ///test a few more edge cases
+    //deal with x == 0 cases
+    if( x == 0.0 )
     {
-        if (!isOddInt) x = 0.0;
+        if( ! isOddInt )
+            x = 0.0;
 
-        if (y < 0) x = 1.0 / x;
+        if( y < 0 )
+            x = 1.0/ x;
 
         return x;
     }
 
-    // x == +-Inf cases
-    if (isinf(fabsx))
+    //x == +-Inf cases
+    if( isinf(fabsx) )
     {
-        if (x < 0)
+        if( x < 0 )
         {
-            if (isOddInt)
+            if( isOddInt )
             {
-                if (y < 0)
+                if( y < 0 )
                     return -0.0;
                 else
                     return -INFINITY;
             }
             else
             {
-                if (y < 0)
+                if( y < 0 )
                     return 0.0;
                 else
                     return INFINITY;
             }
         }
 
-        if (y < 0) return 0;
+        if( y < 0 )
+            return 0;
         return INFINITY;
     }
 
-    // y = +-inf cases
-    if (isinf(fabsy))
+    //y = +-inf cases
+    if( isinf(fabsy) )
     {
-        if (x == -1) return 1;
+        if( x == -1 )
+            return 1;
 
-        if (y < 0)
+        if( y < 0 )
         {
-            if (fabsx < 1) return INFINITY;
+            if( fabsx < 1 )
+                return INFINITY;
             return 0;
         }
-        if (fabsx < 1) return 0;
+        if( fabsx < 1 )
+            return 0;
         return INFINITY;
     }
 
     // x < 0 and y non integer case
-    if (x < 0 && iy != fabsy)
+    if( x < 0 && iy != fabsy )
     {
-        // return nan;
+        //return nan;
         return cl_make_nan();
     }
 
-    // speedy resolution of sqrt and reciprocal sqrt
-    if (fabsy == 0.5)
+    //speedy resolution of sqrt and reciprocal sqrt
+    if( fabsy == 0.5 )
     {
-        long double xl = reference_sqrt(x);
-        if (y < 0) xl = 1.0 / xl;
+        long double xl = reference_sqrt( x );
+        if( y < 0 )
+            xl = 1.0/ xl;
         return xl;
     }
 
@@ -5292,55 +5044,73 @@
     return isOddInt ? reference_copysignd(result, x) : result;
 }
 
-double reference_sqrt(double x) { return sqrt(x); }
+double reference_sqrt(double x)
+{
+    return sqrt(x);
+}
 
-double reference_floor(double x) { return floorf((float)x); }
+double reference_floor(double x)
+{
+    return floorf((float) x);
+}
 
 double reference_ldexp(double value, int exponent)
 {
 #ifdef __MINGW32__
-    /*
-     * ====================================================
-     * This function is from fdlibm: http://www.netlib.org
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (!finite(value) || value == 0.0) return value;
-    return scalbn(value, exponent);
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if(!finite(value)||value==0.0) return value;
+    return scalbn(value,exponent);
 #else
     return reference_scalbn(value, exponent);
 #endif
 }
 
-long double reference_ldexpl(long double x, int n) { return ldexpl(x, n); }
+long double reference_ldexpl(long double x, int n)
+{
+    return ldexpl( x, n);
+}
 
-long double reference_coshl(long double x) { return coshl(x); }
+long double reference_coshl(long double x)
+{
+    return coshl(x);
+}
 
-double reference_ceil(double x) { return ceilf((float)x); }
+double reference_ceil(double x)
+{
+    return ceilf((float) x);
+}
 
 long double reference_ceill(long double x)
 {
-    if (x == 0.0 || reference_isinfl(x) || reference_isnanl(x)) return x;
+    if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) )
+        return x;
 
     long double absx = reference_fabsl(x);
-    if (absx >= HEX_LDBL(+, 1, 0, +, 52)) return x;
+    if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) )
+        return x;
 
-    if (absx < 1.0)
+    if( absx < 1.0 )
     {
-        if (x < 0.0)
+        if( x < 0.0 )
             return 0.0;
         else
             return 1.0;
     }
 
-    long double r = (long double)((cl_long)x);
+    long double r = (long double) ((cl_long) x);
 
-    if (x > 0.0 && r < x) r += 1.0;
+    if( x > 0.0 && r < x )
+        r += 1.0;
 
     return r;
 }
@@ -5351,53 +5121,45 @@
     long double x2 = x * x;
     int i;
 
-    // Prepare a head + tail representation of PI in long double.  A good
-    // compiler should get rid of all of this work.
-    static const cl_ulong pi_bits[2] = {
-        0x3243F6A8885A308DULL, 0x313198A2E0370734ULL
-    }; // first 126 bits of pi
-       // http://www.super-computing.org/pi-hexa_current.html
+    //Prepare a head + tail representation of PI in long double.  A good compiler should get rid of all of this work.
+    static const cl_ulong pi_bits[2] = { 0x3243F6A8885A308DULL, 0x313198A2E0370734ULL};  // first 126 bits of pi http://www.super-computing.org/pi-hexa_current.html
     long double head, tail, temp;
 #if __LDBL_MANT_DIG__ >= 64
     // long double has 64-bits of precision or greater
-    temp = (long double)pi_bits[0] * 0x1.0p64L;
-    head = temp + (long double)pi_bits[1];
-    temp -= head; // rounding err rounding pi_bits[1] into head
-    tail = (long double)pi_bits[1] + temp;
-    head *= HEX_LDBL(+, 1, 0, -, 125);
-    tail *= HEX_LDBL(+, 1, 0, -, 125);
+    temp = (long double) pi_bits[0] * 0x1.0p64L;
+    head = temp + (long double) pi_bits[1];
+    temp -= head;           // rounding err rounding pi_bits[1] into head
+    tail = (long double) pi_bits[1] + temp;
+    head *= HEX_LDBL( +, 1, 0, -, 125 );
+    tail *= HEX_LDBL( +, 1, 0, -, 125 );
 #else
-    head = (long double)pi_bits[0];
-    tail =
-        (long double)((cl_long)pi_bits[0]
-                      - (cl_long)
-                          head); // residual part of pi_bits[0] after rounding
-    tail = tail * HEX_LDBL(+, 1, 0, +, 64) + (long double)pi_bits[1];
-    head *= HEX_LDBL(+, 1, 0, -, 61);
-    tail *= HEX_LDBL(+, 1, 0, -, 125);
+    head = (long double) pi_bits[0];
+    tail = (long double) ((cl_long) pi_bits[0] - (cl_long) head );       // residual part of pi_bits[0] after rounding
+    tail = tail * HEX_LDBL( +, 1, 0, +, 64 ) + (long double) pi_bits[1];
+    head *= HEX_LDBL( +, 1, 0, -, 61 );
+    tail *= HEX_LDBL( +, 1, 0, -, 125 );
 #endif
 
     // oversize values and NaNs go to NaN
-    if (!(x2 <= 1.0)) return sqrtl(1.0L - x2);
+    if( ! (x2 <= 1.0) )
+        return sqrtl(1.0L - x2 );
 
     //
     // deal with large |x|:
     //                                                      sqrt( 1 - x**2)
-    // acos(|x| > sqrt(0.5)) = 2 * atan( z );       z = -------------------- ;
-    // z in [0, sqrt(0.5)/(1+sqrt(0.5) = .4142135...]
+    // acos(|x| > sqrt(0.5)) = 2 * atan( z );       z = -------------------- ;      z in [0, sqrt(0.5)/(1+sqrt(0.5) = .4142135...]
     //                                                          1 + x
-    if (x2 > 0.5)
+    if( x2 > 0.5 )
     {
         // we handle the x < 0 case as pi - acos(|x|)
 
-        long double sign = reference_copysignl(1.0L, x);
-        long double fabsx = reference_fabsl(x);
-        head -= head * sign; // x > 0 ? 0 : pi.hi
-        tail -= tail * sign; // x > 0 ? 0 : pi.low
+        long double sign = reference_copysignl( 1.0L, x );
+        long double fabsx = reference_fabsl( x );
+        head -= head * sign;        // x > 0 ? 0 : pi.hi
+        tail -= tail * sign;        // x > 0 ? 0 : pi.low
 
-        // z = sqrt( 1-x**2 ) / (1+x) = sqrt( (1-x)(1+x) / (1+x)**2 ) = sqrt(
-        // (1-x)/(1+x) )
-        long double z2 = (1.0L - fabsx) / (1.0L + fabsx); // z**2
+        // z = sqrt( 1-x**2 ) / (1+x) = sqrt( (1-x)(1+x) / (1+x)**2 ) = sqrt( (1-x)/(1+x) )
+        long double z2 = (1.0L - fabsx) / (1.0L + fabsx);   // z**2
         long double z = sign * sqrtl(z2);
 
         //                     atan(sqrt(q))
@@ -5407,41 +5169,29 @@
         // Define q = r*r, and solve for atan(r):
         //
         //  atan(r) = (p(r) + 1) * r = rp(r) + r
-        static long double atan_coeffs[] = {
-            HEX_LDBL(-, b, 3f52e0c278293b3, -, 67),
-            HEX_LDBL(-, a, aaaaaaaaaaa95b8, -, 5),
-            HEX_LDBL(+, c, ccccccccc992407, -, 6),
-            HEX_LDBL(-, 9, 24924923024398, -, 6),
-            HEX_LDBL(+, e, 38e38d6f92c98f3, -, 7),
-            HEX_LDBL(-, b, a2e89bfb8393ec6, -, 7),
-            HEX_LDBL(+, 9, d89a9f574d412cb, -, 7),
-            HEX_LDBL(-, 8, 88580517884c547, -, 7),
-            HEX_LDBL(+, f, 0ab6756abdad408, -, 8),
-            HEX_LDBL(-, d, 56a5b07a2f15b49, -, 8),
-            HEX_LDBL(+, b, 72ab587e46d80b2, -, 8),
-            HEX_LDBL(-, 8, 62ea24bb5b2e636, -, 8),
-            HEX_LDBL(+, e, d67c16582123937, -, 10)
-        }; // minimax fit over [ 0x1.0p-52, 0.18]   Max error:
-           // 0x1.67ea5c184e5d9p-64
+        static long double atan_coeffs[] = { HEX_LDBL( -, b, 3f52e0c278293b3, -, 67 ), HEX_LDBL( -, a, aaaaaaaaaaa95b8, -, 5 ),
+                                             HEX_LDBL( +, c, ccccccccc992407, -,  6 ), HEX_LDBL( -, 9, 24924923024398,  -, 6 ),
+                                             HEX_LDBL( +, e, 38e38d6f92c98f3, -,  7 ), HEX_LDBL( -, b, a2e89bfb8393ec6, -, 7 ),
+                                             HEX_LDBL( +, 9, d89a9f574d412cb, -,  7 ), HEX_LDBL( -, 8, 88580517884c547, -, 7 ),
+                                             HEX_LDBL( +, f, 0ab6756abdad408, -,  8 ), HEX_LDBL( -, d, 56a5b07a2f15b49, -, 8 ),
+                                             HEX_LDBL( +, b, 72ab587e46d80b2, -,  8 ), HEX_LDBL( -, 8, 62ea24bb5b2e636, -, 8 ),
+                                             HEX_LDBL( +, e, d67c16582123937, -, 10 ) }; // minimax fit over [ 0x1.0p-52, 0.18]   Max error:  0x1.67ea5c184e5d9p-64
 
         // Calculate y = p(r)
-        const size_t atan_coeff_count =
-            sizeof(atan_coeffs) / sizeof(atan_coeffs[0]);
-        long double y = atan_coeffs[atan_coeff_count - 1];
-        for (i = (int)atan_coeff_count - 2; i >= 0; i--)
+        const size_t atan_coeff_count = sizeof( atan_coeffs ) / sizeof( atan_coeffs[0] );
+        long double y = atan_coeffs[ atan_coeff_count - 1];
+        for( i = (int)atan_coeff_count - 2; i >= 0; i-- )
             y = atan_coeffs[i] + y * z2;
 
-        z *= 2.0L; // fold in 2.0 for 2.0 * atan(z)
-        y *= z; // rp(r)
+        z *= 2.0L;   // fold in 2.0 for 2.0 * atan(z)
+        y *= z;      // rp(r)
 
         return head + ((y + tail) + z);
     }
 
     // do |x| <= sqrt(0.5) here
-    //                                                     acos( sqrt(z) ) -
-    //                                                     PI/2
-    //  Piecewise minimax polynomial fits for p(z) = 1 +
-    //  ------------------------;
+    //                                                     acos( sqrt(z) ) - PI/2
+    //  Piecewise minimax polynomial fits for p(z) = 1 + ------------------------;
     //                                                            sqrt(z)
     //
     //  Define z = x*x, and solve for acos(x) over x in  x >= 0:
@@ -5449,88 +5199,52 @@
     //      acos( sqrt(z) ) = acos(x) = x*(p(z)-1) + PI/2 = xp(x**2) - x + PI/2
     //
     const long double coeffs[4][14] = {
-        { HEX_LDBL(-, a, fa7382e1f347974, -, 10),
-          HEX_LDBL(-, b, 4d5a992de1ac4da, -, 6),
-          HEX_LDBL(-, a, c526184bd558c17, -, 7),
-          HEX_LDBL(-, d, 9ed9b0346ec092a, -, 8),
-          HEX_LDBL(-, 9, dca410c1f04b1f, -, 8),
-          HEX_LDBL(-, f, 76e411ba9581ee5, -, 9),
-          HEX_LDBL(-, c, c71b00479541d8e, -, 9),
-          HEX_LDBL(-, a, f527a3f9745c9de, -, 9),
-          HEX_LDBL(-, 9, a93060051f48d14, -, 9),
-          HEX_LDBL(-, 8, b3d39ad70e06021, -, 9),
-          HEX_LDBL(-, f, f2ab95ab84f79c, -, 10),
-          HEX_LDBL(-, e, d1af5f5301ccfe4, -, 10),
-          HEX_LDBL(-, e, 1b53ba562f0f74a, -, 10),
-          HEX_LDBL(-, d, 6a3851330e15526, -,
-                   10) }, // x - 0.0625 in [ -0x1.fffffffffp-5, 0x1.0p-4 ]
-                          // Error: 0x1.97839bf07024p-76
+                                    { HEX_LDBL( -, a, fa7382e1f347974, -, 10 ), HEX_LDBL( -, b, 4d5a992de1ac4da, -,  6 ),
+                                      HEX_LDBL( -, a, c526184bd558c17, -,  7 ), HEX_LDBL( -, d, 9ed9b0346ec092a, -,  8 ),
+                                      HEX_LDBL( -, 9, dca410c1f04b1f,  -,  8 ), HEX_LDBL( -, f, 76e411ba9581ee5, -,  9 ),
+                                      HEX_LDBL( -, c, c71b00479541d8e, -,  9 ), HEX_LDBL( -, a, f527a3f9745c9de, -,  9 ),
+                                      HEX_LDBL( -, 9, a93060051f48d14, -,  9 ), HEX_LDBL( -, 8, b3d39ad70e06021, -,  9 ),
+                                      HEX_LDBL( -, f, f2ab95ab84f79c,  -, 10 ), HEX_LDBL( -, e, d1af5f5301ccfe4, -, 10 ),
+                                      HEX_LDBL( -, e, 1b53ba562f0f74a, -, 10 ), HEX_LDBL( -, d, 6a3851330e15526, -, 10 ) },  // x - 0.0625 in [ -0x1.fffffffffp-5, 0x1.0p-4 ]    Error: 0x1.97839bf07024p-76
 
-        { HEX_LDBL(-, 8, c2f1d638e4c1b48, -, 8),
-          HEX_LDBL(-, c, d47ac903c311c2c, -, 6),
-          HEX_LDBL(-, d, e020b2dabd5606a, -, 7),
-          HEX_LDBL(-, a, 086fafac220f16b, -, 7),
-          HEX_LDBL(-, 8, 55b5efaf6b86c3e, -, 7),
-          HEX_LDBL(-, f, 05c9774fed2f571, -, 8),
-          HEX_LDBL(-, e, 484a93f7f0fc772, -, 8),
-          HEX_LDBL(-, e, 1a32baef01626e4, -, 8),
-          HEX_LDBL(-, e, 528e525b5c9c73d, -, 8),
-          HEX_LDBL(-, e, ddd5d27ad49b2c8, -, 8),
-          HEX_LDBL(-, f, b3259e7ae10c6f, -, 8),
-          HEX_LDBL(-, 8, 68998170d5b19b7, -, 7),
-          HEX_LDBL(-, 9, 4468907f007727, -, 7),
-          HEX_LDBL(-, a, 2ad5e4906a8e7b3, -,
-                   7) }, // x - 0.1875 in [ -0x1.0p-4, 0x1.0p-4 ]    Error:
-                         // 0x1.647af70073457p-73
+                                    { HEX_LDBL( -, 8, c2f1d638e4c1b48, -,  8 ), HEX_LDBL( -, c, d47ac903c311c2c, -,  6 ),
+                                      HEX_LDBL( -, d, e020b2dabd5606a, -,  7 ), HEX_LDBL( -, a, 086fafac220f16b, -,  7 ),
+                                      HEX_LDBL( -, 8, 55b5efaf6b86c3e, -,  7 ), HEX_LDBL( -, f, 05c9774fed2f571, -,  8 ),
+                                      HEX_LDBL( -, e, 484a93f7f0fc772, -,  8 ), HEX_LDBL( -, e, 1a32baef01626e4, -,  8 ),
+                                      HEX_LDBL( -, e, 528e525b5c9c73d, -,  8 ), HEX_LDBL( -, e, ddd5d27ad49b2c8, -,  8 ),
+                                      HEX_LDBL( -, f, b3259e7ae10c6f,  -,  8 ), HEX_LDBL( -, 8, 68998170d5b19b7, -,  7 ),
+                                      HEX_LDBL( -, 9, 4468907f007727,  -,  7 ), HEX_LDBL( -, a, 2ad5e4906a8e7b3, -,  7 ) },// x - 0.1875 in [ -0x1.0p-4, 0x1.0p-4 ]    Error: 0x1.647af70073457p-73
 
-        { HEX_LDBL(-, f, a76585ad399e7ac, -, 8),
-          HEX_LDBL(-, e, d665b7dd504ca7c, -, 6),
-          HEX_LDBL(-, 9, 4c7c2402bd4bc33, -, 6),
-          HEX_LDBL(-, f, ba76b69074ff71c, -, 7),
-          HEX_LDBL(-, f, 58117784bdb6d5f, -, 7),
-          HEX_LDBL(-, 8, 22ddd8eef53227d, -, 6),
-          HEX_LDBL(-, 9, 1d1d3b57a63cdb4, -, 6),
-          HEX_LDBL(-, a, 9c4bdc40cca848, -, 6),
-          HEX_LDBL(-, c, b673b12794edb24, -, 6),
-          HEX_LDBL(-, f, 9290a06e31575bf, -, 6),
-          HEX_LDBL(-, 9, b4929c16aeb3d1f, -, 5),
-          HEX_LDBL(-, c, 461e725765a7581, -, 5),
-          HEX_LDBL(-, 8, 0a59654c98d9207, -, 4),
-          HEX_LDBL(-, a, 6de6cbd96c80562, -,
-                   4) }, // x - 0.3125 in [ -0x1.0p-4, 0x1.0p-4 ]   Error:
-                         // 0x1.b0246c304ce1ap-70
+                                    { HEX_LDBL( -, f, a76585ad399e7ac, -,  8 ), HEX_LDBL( -, e, d665b7dd504ca7c, -,  6 ),
+                                      HEX_LDBL( -, 9, 4c7c2402bd4bc33, -,  6 ), HEX_LDBL( -, f, ba76b69074ff71c, -,  7 ),
+                                      HEX_LDBL( -, f, 58117784bdb6d5f, -,  7 ), HEX_LDBL( -, 8, 22ddd8eef53227d, -,  6 ),
+                                      HEX_LDBL( -, 9, 1d1d3b57a63cdb4, -,  6 ), HEX_LDBL( -, a, 9c4bdc40cca848,  -,  6 ),
+                                      HEX_LDBL( -, c, b673b12794edb24, -,  6 ), HEX_LDBL( -, f, 9290a06e31575bf, -,  6 ),
+                                      HEX_LDBL( -, 9, b4929c16aeb3d1f, -,  5 ), HEX_LDBL( -, c, 461e725765a7581, -,  5 ),
+                                      HEX_LDBL( -, 8, 0a59654c98d9207, -,  4 ), HEX_LDBL( -, a, 6de6cbd96c80562, -,  4 ) }, // x - 0.3125 in [ -0x1.0p-4, 0x1.0p-4 ]   Error: 0x1.b0246c304ce1ap-70
 
-        { HEX_LDBL(-, b, dca8b0359f96342, -, 7),
-          HEX_LDBL(-, 8, cd2522fcde9823, -, 5),
-          HEX_LDBL(-, d, 2af9397b27ff74d, -, 6),
-          HEX_LDBL(-, d, 723f2c2c2409811, -, 6),
-          HEX_LDBL(-, f, ea8f8481ecc3cd1, -, 6),
-          HEX_LDBL(-, a, 43fd8a7a646b0b2, -, 5),
-          HEX_LDBL(-, e, 01b0bf63a4e8d76, -, 5),
-          HEX_LDBL(-, 9, f0b7096a2a7b4d, -, 4),
-          HEX_LDBL(-, e, 872e7c5a627ab4c, -, 4),
-          HEX_LDBL(-, a, dbd760a1882da48, -, 3),
-          HEX_LDBL(-, 8, 424e4dea31dd273, -, 2),
-          HEX_LDBL(-, c, c05d7730963e793, -, 2),
-          HEX_LDBL(-, a, 523d97197cd124a, -, 1),
-          HEX_LDBL(-, 8, 307ba943978aaee, +,
-                   0) } // x - 0.4375 in [ -0x1.0p-4, 0x1.0p-4 ]  Error:
-                        // 0x1.9ecff73da69c9p-66
-    };
+                                    { HEX_LDBL( -, b, dca8b0359f96342, -,  7 ), HEX_LDBL( -, 8, cd2522fcde9823,  -,  5 ),
+                                      HEX_LDBL( -, d, 2af9397b27ff74d, -,  6 ), HEX_LDBL( -, d, 723f2c2c2409811, -,  6 ),
+                                      HEX_LDBL( -, f, ea8f8481ecc3cd1, -,  6 ), HEX_LDBL( -, a, 43fd8a7a646b0b2, -,  5 ),
+                                      HEX_LDBL( -, e, 01b0bf63a4e8d76, -,  5 ), HEX_LDBL( -, 9, f0b7096a2a7b4d,  -,  4 ),
+                                      HEX_LDBL( -, e, 872e7c5a627ab4c, -,  4 ), HEX_LDBL( -, a, dbd760a1882da48, -,  3 ),
+                                      HEX_LDBL( -, 8, 424e4dea31dd273, -,  2 ), HEX_LDBL( -, c, c05d7730963e793, -,  2 ),
+                                      HEX_LDBL( -, a, 523d97197cd124a, -,  1 ), HEX_LDBL( -, 8, 307ba943978aaee, +,  0 ) } // x - 0.4375 in [ -0x1.0p-4, 0x1.0p-4 ]  Error: 0x1.9ecff73da69c9p-66
+                                 };
 
     const long double offsets[4] = { 0.0625, 0.1875, 0.3125, 0.4375 };
-    const size_t coeff_count = sizeof(coeffs[0]) / sizeof(coeffs[0][0]);
+    const size_t coeff_count = sizeof( coeffs[0] ) / sizeof( coeffs[0][0] );
 
-    // reduce the incoming values a bit so that they are in the range
-    // [-0x1.0p-4, 0x1.0p-4]
+    // reduce the incoming values a bit so that they are in the range [-0x1.0p-4, 0x1.0p-4]
     const long double *c;
     i = x2 * 8.0L;
     c = coeffs[i];
-    x2 -= offsets[i]; // exact
+    x2 -= offsets[i];       // exact
 
     // calcualte p(x2)
-    long double y = c[coeff_count - 1];
-    for (i = (int)coeff_count - 2; i >= 0; i--) y = c[i] + y * x2;
+    long double y = c[ coeff_count - 1];
+    for( i = (int)coeff_count - 2; i >= 0; i-- )
+        y = c[i] + y * x2;
 
     // xp(x2)
     y *= x;
@@ -5539,50 +5253,58 @@
     return head + ((y + tail) - x);
 }
 
-double reference_relaxed_acos(double x) { return reference_acos(x); }
-
 double reference_log10(double x)
 {
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
 
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
 
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
 
-    double log2Hi = HEX_DBL(+, 1, 34413509f79fe, -, 2);
+    double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
-    return logxHi * log2Hi;
+    return logxHi*log2Hi;
 }
 
-double reference_relaxed_log10(double x) { return reference_log10(x); }
-
 long double reference_log10l(long double x)
 {
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
 
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
 
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
 
-    double log2Hi = HEX_DBL(+, 1, 34413509f79fe, -, 2);
-    double log2Lo = HEX_DBL(+, 1, e623e2566b02d, -, 55);
+    double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 );
+    double log2Lo = HEX_DBL( +, 1, e623e2566b02d, -, 55 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
 
-    long double lg2 = (long double)log2Hi + (long double)log2Lo;
-    long double logx = (long double)logxHi + (long double)logxLo;
-    return logx * lg2;
+    //double rhi, rlo;
+    //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo);
+    //return (long double) rhi + (long double) rlo;
+
+    long double lg2 = (long double) log2Hi + (long double) log2Lo;
+    long double logx = (long double) logxHi + (long double) logxLo;
+    return logx*lg2;
 }
 
-double reference_acos(double x) { return acos(x); }
+double reference_acos(double x)
+{
+    return acos( x );
+}
 
 double reference_atan2(double x, double y)
 {
 #if defined(_WIN32)
     // fix edge cases for Windows
-    if (isinf(x) && isinf(y))
-    {
+    if (isinf(x) && isinf(y)) {
         double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4;
         return (x > 0) ? retval : -retval;
     }
@@ -5594,8 +5316,7 @@
 {
 #if defined(_WIN32)
     // fix edge cases for Windows
-    if (isinf(x) && isinf(y))
-    {
+    if (isinf(x) && isinf(y)) {
         long double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4;
         return (x > 0) ? retval : -retval;
     }
@@ -5605,7 +5326,7 @@
 
 double reference_frexp(double a, int *exp)
 {
-    if (isnan(a) || isinf(a) || a == 0.0)
+    if(isnan(a) || isinf(a) || a == 0.0)
     {
         *exp = 0;
         return a;
@@ -5623,7 +5344,7 @@
     u.l &= 0x7fffffffffffffffULL;
     int bias = -1022;
 
-    if ((u.l & 0x7ff0000000000000ULL) == 0)
+    if((u.l & 0x7ff0000000000000ULL) == 0)
     {
         double d = u.l;
         u.d = d;
@@ -5642,13 +5363,13 @@
 
 long double reference_frexpl(long double a, int *exp)
 {
-    if (isnan(a) || isinf(a) || a == 0.0)
+    if(isnan(a) || isinf(a) || a == 0.0)
     {
         *exp = 0;
         return a;
     }
 
-    if (sizeof(long double) == sizeof(double))
+    if(sizeof(long double) == sizeof(double))
     {
         return reference_frexp(a, exp);
     }
@@ -5659,64 +5380,90 @@
 }
 
 
-double reference_atan(double x) { return atan(x); }
+double reference_atan(double x)
+{
+    return atan( x );
+}
 
-long double reference_atanl(long double x) { return atanl(x); }
+long double reference_atanl(long double x)
+{
+    return atanl( x );
+}
 
-long double reference_asinl(long double x) { return asinl(x); }
+long double reference_asinl(long double x)
+{
+    return asinl( x );
+}
 
-double reference_asin(double x) { return asin(x); }
+double reference_asin(double x)
+{
+    return asin( x );
+}
 
-double reference_relaxed_asin(double x) { return reference_asin(x); }
+double reference_fabs(double x)
+{
+    return fabs( x);
+}
 
-double reference_fabs(double x) { return fabs(x); }
-
-double reference_cosh(double x) { return cosh(x); }
+double reference_cosh(double x)
+{
+    return cosh( x );
+}
 
 long double reference_sqrtl(long double x)
 {
-#if defined(__SSE2__)                                                          \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
-    __m128d result128 = _mm_set_sd((double)x);
+#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    __m128d result128 = _mm_set_sd((double) x);
     result128 = _mm_sqrt_sd(result128, result128);
     return _mm_cvtsd_f64(result128);
 #else
     volatile double dx = x;
-    return sqrt(dx);
+    return sqrt( dx );
 #endif
 }
 
-long double reference_tanhl(long double x) { return tanhl(x); }
+long double reference_tanhl(long double x)
+{
+    return tanhl( x );
+}
 
 long double reference_floorl(long double x)
 {
-    if (x == 0.0 || reference_isinfl(x) || reference_isnanl(x)) return x;
+    if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) )
+        return x;
 
     long double absx = reference_fabsl(x);
-    if (absx >= HEX_LDBL(+, 1, 0, +, 52)) return x;
+    if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) )
+        return x;
 
-    if (absx < 1.0)
+    if( absx < 1.0 )
     {
-        if (x < 0.0)
+        if( x < 0.0 )
             return -1.0;
         else
             return 0.0;
     }
 
-    long double r = (long double)((cl_long)x);
+    long double r = (long double) ((cl_long) x);
 
-    if (x < 0.0 && r > x) r -= 1.0;
+    if( x < 0.0 && r > x )
+        r -= 1.0;
 
     return r;
 }
 
 
-double reference_tanh(double x) { return tanh(x); }
+double reference_tanh(double x)
+{
+    return tanh( x );
+}
 
-long double reference_assignmentl(long double x) { return x; }
+long double reference_assignmentl( long double x ){ return x; }
 
-int reference_notl(long double x)
+int reference_notl( long double x )
 {
     int r = !x;
     return r;
 }
+
+

diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h
index 78b2451..bcd0df8 100644
--- a/test_conformance/math_brute_force/reference_math.h
+++ b/test_conformance/math_brute_force/reference_math.h

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,221 +16,217 @@
 #ifndef REFERENCE_MATH_H
 #define REFERENCE_MATH_H
 
-#if defined(__APPLE__)
-#include <OpenCL/opencl.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
 #else
-#include <CL/cl.h>
+    #include <CL/cl.h>
 #endif
 
 // --  for testing float --
-double reference_sinh(double x);
-double reference_sqrt(double x);
-double reference_tanh(double x);
-double reference_acos(double);
-double reference_asin(double);
-double reference_atan(double);
-double reference_atan2(double, double);
-double reference_ceil(double);
-double reference_cosh(double);
-double reference_exp(double);
-double reference_fabs(double);
-double reference_acospi(double);
-double reference_asinpi(double);
-double reference_atanpi(double);
-double reference_atan2pi(double, double);
-double reference_cospi(double);
-double reference_divide(double, double);
-double reference_fract(double, double*);
-float reference_fma(float, float, float, int);
-double reference_mad(double, double, double);
-double reference_nextafter(double, double);
-double reference_recip(double);
-double reference_rootn(double, int);
-double reference_rsqrt(double);
-double reference_sincos(double, double*);
-double reference_sinpi(double);
-double reference_tanpi(double);
+double reference_sinh( double x );
+double reference_sqrt( double x );
+double reference_tanh( double x );
+double reference_acos( double );
+double reference_asin( double );
+double reference_atan( double );
+double reference_atan2( double, double );
+double reference_ceil( double );
+double reference_cosh( double );
+double reference_exp( double );
+double reference_fabs( double );
+double reference_acospi( double );
+double reference_asinpi( double );
+double reference_atanpi( double );
+double reference_atan2pi( double, double );
+double reference_cospi( double );
+double reference_divide( double, double );
+double reference_fract( double, double * );
+float  reference_fma( float, float, float, int );
+double reference_mad( double, double, double );
+double reference_nextafter(double, double );
+double reference_recip( double );
+double reference_rootn( double, int );
+double reference_rsqrt( double );
+double reference_sincos( double, double * );
+double reference_sinpi( double );
+double reference_tanpi( double );
 double reference_pow(double x, double y);
-double reference_pown(double, int);
-double reference_powr(double, double);
-double reference_cos(double);
-double reference_sin(double);
-double reference_tan(double);
-double reference_log(double);
-double reference_log10(double);
-double reference_modf(double, double* n);
+double reference_pown( double, int );
+double reference_powr( double, double );
+double reference_cos( double );
+double reference_sin( double );
+double reference_tan( double );
+double reference_log( double );
+double reference_log10( double );
+double reference_modf( double, double *n );
 
-double reference_fdim(double, double);
-double reference_add(double, double);
-double reference_subtract(double, double);
-double reference_divide(double, double);
-double reference_multiply(double, double);
-double reference_remquo(double, double, int*);
-double reference_lgamma_r(double, int*);
+double reference_fdim( double, double );
+double reference_add( double, double );
+double reference_subtract( double, double );
+double reference_divide( double, double );
+double reference_multiply( double, double );
+double reference_remquo( double, double, int* );
+double reference_lgamma_r( double, int* );
 
-int reference_isequal(double, double);
-int reference_isfinite(double);
-int reference_isgreater(double, double);
-int reference_isgreaterequal(double, double);
-int reference_isinf(double);
-int reference_isless(double, double);
-int reference_islessequal(double, double);
-int reference_islessgreater(double, double);
-int reference_isnan(double);
-int reference_isnormal(double);
-int reference_isnotequal(double, double);
-int reference_isordered(double, double);
-int reference_isunordered(double, double);
-int reference_signbit(float);
+int reference_isequal( double, double );
+int reference_isfinite( double );
+int reference_isgreater( double, double );
+int reference_isgreaterequal( double, double );
+int reference_isinf( double );
+int reference_isless( double, double );
+int reference_islessequal( double, double );
+int reference_islessgreater( double, double );
+int reference_isnan( double );
+int reference_isnormal( double );
+int reference_isnotequal( double, double );
+int reference_isordered( double, double );
+int reference_isunordered( double, double );
+int reference_signbit( float );
 
-double reference_acosh(double x);
-double reference_asinh(double x);
-double reference_atanh(double x);
+double reference_acosh( double x );
+double reference_asinh( double x );
+double reference_atanh( double x );
 double reference_cbrt(double x);
-float reference_copysign(float x, float y);
-double reference_copysignd(double x, double y);
-double reference_exp10(double);
-double reference_exp2(double x);
-double reference_expm1(double x);
-double reference_fmax(double x, double y);
-double reference_fmin(double x, double y);
-double reference_hypot(double x, double y);
-double reference_lgamma(double x);
-int reference_ilogb(double);
-double reference_log2(double x);
-double reference_log1p(double x);
-double reference_logb(double x);
-double reference_maxmag(double x, double y);
-double reference_minmag(double x, double y);
-double reference_nan(cl_uint x);
-double reference_reciprocal(double x);
-double reference_remainder(double x, double y);
-double reference_rint(double x);
-double reference_round(double x);
-double reference_trunc(double x);
-double reference_floor(double x);
-double reference_fmod(double x, double y);
-double reference_frexp(double x, int* n);
-double reference_ldexp(double x, int n);
+float reference_copysign( float x, float y);
+double reference_copysignd( double x, double y);
+double reference_exp10( double );
+double reference_exp2( double x );
+double reference_expm1( double x );
+double reference_fmax( double x, double y );
+double reference_fmin( double x, double y );
+double reference_hypot( double x, double y );
+double reference_lgamma( double x);
+int    reference_ilogb( double );
+double reference_log2( double x );
+double reference_log1p( double x );
+double reference_logb( double x );
+double reference_maxmag( double x, double y );
+double reference_minmag( double x, double y );
+double reference_nan( cl_uint x );
+double reference_reciprocal( double x );
+double reference_remainder( double x, double y );
+double reference_rint( double x );
+double reference_round( double x );
+double reference_trunc( double x );
+double reference_floor( double x );
+double reference_fmod( double x, double y );
+double reference_frexp( double x, int *n );
+double reference_ldexp( double x, int n );
 
-double reference_assignment(double x);
-int reference_not(double x);
+double reference_assignment( double x );
+int    reference_not( double x );
 // -- for testing fast-relaxed
 
-double reference_relaxed_acos(double);
-double reference_relaxed_asin(double);
-double reference_relaxed_atan(double);
-double reference_relaxed_mad(double, double, double);
-double reference_relaxed_divide(double x, double y);
-double reference_relaxed_sin(double x);
-double reference_relaxed_sinpi(double x);
-double reference_relaxed_cos(double x);
-double reference_relaxed_cospi(double x);
-double reference_relaxed_sincos(double x, double* y);
-double reference_relaxed_tan(double x);
-double reference_relaxed_exp(double x);
-double reference_relaxed_exp2(double x);
-double reference_relaxed_exp10(double x);
-double reference_relaxed_log(double x);
-double reference_relaxed_log2(double x);
-double reference_relaxed_log10(double x);
-double reference_relaxed_pow(double x, double y);
-double reference_relaxed_reciprocal(double x);
+double reference_relaxed_mad( double, double, double );
+double reference_relaxed_divide( double x, double y );
+double reference_relaxed_sin( double x );
+double reference_relaxed_cos( double x );
+double reference_relaxed_sincos( double x, double * y);
+double reference_relaxed_tan( double x );
+double reference_relaxed_exp( double x );
+double reference_relaxed_exp2( double x );
+double reference_relaxed_exp10( double x );
+double reference_relaxed_log( double x );
+double reference_relaxed_log2( double x );
+double reference_relaxed_pow( double x, double y);
+double reference_relaxed_reciprocal( double x );
 
 // -- for testing double --
 
-long double reference_sinhl(long double x);
-long double reference_sqrtl(long double x);
-long double reference_tanhl(long double x);
-long double reference_acosl(long double);
-long double reference_asinl(long double);
-long double reference_atanl(long double);
-long double reference_atan2l(long double, long double);
-long double reference_ceill(long double);
-long double reference_coshl(long double);
-long double reference_expl(long double);
-long double reference_fabsl(long double);
-long double reference_acospil(long double);
-long double reference_asinpil(long double);
-long double reference_atanpil(long double);
-long double reference_atan2pil(long double, long double);
-long double reference_cospil(long double);
-long double reference_dividel(long double, long double);
-long double reference_fractl(long double, long double*);
-long double reference_fmal(long double, long double, long double);
-long double reference_madl(long double, long double, long double);
-long double reference_nextafterl(long double, long double);
-long double reference_recipl(long double);
-long double reference_rootnl(long double, int);
-long double reference_rsqrtl(long double);
-long double reference_sincosl(long double, long double*);
-long double reference_sinpil(long double);
-long double reference_tanpil(long double);
+long double reference_sinhl( long double x );
+long double reference_sqrtl( long double x );
+long double reference_tanhl( long double x );
+long double reference_acosl( long double );
+long double reference_asinl( long double );
+long double reference_atanl( long double );
+long double reference_atan2l( long double, long double );
+long double reference_ceill( long double );
+long double reference_coshl( long double );
+long double reference_expl( long double );
+long double reference_fabsl( long double );
+long double reference_acospil( long double );
+long double reference_asinpil( long double );
+long double reference_atanpil( long double );
+long double reference_atan2pil( long double, long double );
+long double reference_cospil( long double );
+long double reference_dividel( long double, long double );
+long double reference_fractl( long double, long double * );
+long double reference_fmal( long double, long double, long double );
+long double reference_madl( long double, long double, long double );
+long double reference_nextafterl(long double, long double );
+long double reference_recipl( long double );
+long double reference_rootnl( long double, int );
+long double reference_rsqrtl( long double );
+long double reference_sincosl( long double, long double * );
+long double reference_sinpil( long double );
+long double reference_tanpil( long double );
 long double reference_powl(long double x, long double y);
-long double reference_pownl(long double, int);
-long double reference_powrl(long double, long double);
-long double reference_cosl(long double);
-long double reference_sinl(long double);
-long double reference_tanl(long double);
-long double reference_logl(long double);
-long double reference_log10l(long double);
-long double reference_modfl(long double, long double* n);
+long double reference_pownl( long double, int );
+long double reference_powrl( long double, long double );
+long double reference_cosl( long double );
+long double reference_sinl(long double );
+long double reference_tanl( long double );
+long double reference_logl( long double );
+long double reference_log10l( long double );
+long double reference_modfl( long double, long double *n );
 
 
-long double reference_fdiml(long double, long double);
-long double reference_addl(long double, long double);
-long double reference_subtractl(long double, long double);
-long double reference_dividel(long double, long double);
-long double reference_multiplyl(long double, long double);
-long double reference_remquol(long double, long double, int*);
-long double reference_lgamma_rl(long double, int*);
+long double reference_fdiml( long double, long double );
+long double reference_addl( long double, long double );
+long double reference_subtractl( long double, long double );
+long double reference_dividel( long double, long double );
+long double reference_multiplyl( long double, long double );
+long double reference_remquol( long double, long double, int* );
+long double reference_lgamma_rl( long double, int* );
 
 
-int reference_isequall(long double, long double);
-int reference_isfinitel(long double);
-int reference_isgreaterl(long double, long double);
-int reference_isgreaterequall(long double, long double);
-int reference_isinfl(long double);
-int reference_islessl(long double, long double);
-int reference_islessequall(long double, long double);
-int reference_islessgreaterl(long double, long double);
-int reference_isnanl(long double);
-int reference_isnormall(long double);
-int reference_isnotequall(long double, long double);
-int reference_isorderedl(long double, long double);
-int reference_isunorderedl(long double, long double);
-int reference_signbitl(long double);
+int reference_isequall( long double, long double );
+int reference_isfinitel( long double );
+int reference_isgreaterl( long double, long double );
+int reference_isgreaterequall( long double, long double );
+int reference_isinfl( long double );
+int reference_islessl( long double, long double );
+int reference_islessequall( long double, long double );
+int reference_islessgreaterl( long double, long double );
+int reference_isnanl( long double );
+int reference_isnormall( long double );
+int reference_isnotequall( long double, long double );
+int reference_isorderedl( long double, long double );
+int reference_isunorderedl( long double, long double );
+int reference_signbitl( long double );
 
-long double reference_acoshl(long double x);
-long double reference_asinhl(long double x);
-long double reference_atanhl(long double x);
+long double reference_acoshl( long double x );
+long double reference_asinhl( long double x );
+long double reference_atanhl( long double x );
 long double reference_cbrtl(long double x);
-long double reference_copysignl(long double x, long double y);
-long double reference_exp10l(long double);
-long double reference_exp2l(long double x);
-long double reference_expm1l(long double x);
-long double reference_fmaxl(long double x, long double y);
-long double reference_fminl(long double x, long double y);
-long double reference_hypotl(long double x, long double y);
-long double reference_lgammal(long double x);
-int reference_ilogbl(long double);
-long double reference_log2l(long double x);
-long double reference_log1pl(long double x);
-long double reference_logbl(long double x);
-long double reference_maxmagl(long double x, long double y);
-long double reference_minmagl(long double x, long double y);
-long double reference_nanl(cl_ulong x);
-long double reference_reciprocall(long double x);
-long double reference_remainderl(long double x, long double y);
-long double reference_rintl(long double x);
-long double reference_roundl(long double x);
-long double reference_truncl(long double x);
-long double reference_floorl(long double x);
-long double reference_fmodl(long double x, long double y);
-long double reference_frexpl(long double x, int* n);
-long double reference_ldexpl(long double x, int n);
+long double reference_copysignl( long double x, long double y);
+long double reference_exp10l( long double );
+long double reference_exp2l( long double x );
+long double reference_expm1l( long double x );
+long double reference_fmaxl( long double x, long double y );
+long double reference_fminl( long double x, long double y );
+long double reference_hypotl( long double x, long double y );
+long double reference_lgammal( long double x);
+int    reference_ilogbl( long double );
+long double reference_log2l( long double x );
+long double reference_log1pl( long double x );
+long double reference_logbl( long double x );
+long double reference_maxmagl( long double x, long double y );
+long double reference_minmagl( long double x, long double y );
+long double reference_nanl( cl_ulong x );
+long double reference_reciprocall( long double x );
+long double reference_remainderl( long double x, long double y );
+long double reference_rintl( long double x );
+long double reference_roundl( long double x );
+long double reference_truncl( long double x );
+long double reference_floorl( long double x );
+long double reference_fmodl( long double x, long double y );
+long double reference_frexpl( long double x, int *n );
+long double reference_ldexpl( long double x, int n );
 
-long double reference_assignmentl(long double x);
-int reference_notl(long double x);
+long double reference_assignmentl( long double x );
+int reference_notl( long double x );
 
 #endif
+
+

diff --git a/test_conformance/math_brute_force/sleep.cpp b/test_conformance/math_brute_force/sleep.cpp
deleted file mode 100644
index c7b1243..0000000
--- a/test_conformance/math_brute_force/sleep.cpp
+++ /dev/null

@@ -1,110 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "sleep.h"
-#include "utility.h"
-
-#if defined(__APPLE__)
-#include <IOKit/pwr_mgt/IOPMLib.h>
-#include <IOKit/IOMessage.h>
-
-struct
-{
-    io_connect_t connection;
-    IONotificationPortRef port;
-    io_object_t iterator;
-} sleepInfo;
-
-void sleepCallback(void* refcon, io_service_t service, natural_t messageType,
-                   void* messageArgument);
-
-void sleepCallback(void* refcon UNUSED, io_service_t service UNUSED,
-                   natural_t messageType, void* messageArgument)
-{
-
-    IOReturn result;
-    /*
-    service -- The IOService whose state has changed.
-    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the
-    IOService's family. messageArgument -- An argument for the message,
-    dependent on the messageType.
-    */
-    switch (messageType)
-    {
-        case kIOMessageSystemWillSleep:
-            // Handle demand sleep (such as sleep caused by running out of
-            // batteries, closing the lid of a laptop, or selecting
-            // sleep from the Apple menu.
-            IOAllowPowerChange(sleepInfo.connection, (long)messageArgument);
-            vlog("Hard sleep occurred.\n");
-            break;
-        case kIOMessageCanSystemSleep:
-            // In this case, the computer has been idle for several minutes
-            // and will sleep soon so you must either allow or cancel
-            // this notification. Important: if you don’t respond, there will
-            // be a 30-second timeout before the computer sleeps.
-            // IOCancelPowerChange(root_port,(long)messageArgument);
-            result = IOCancelPowerChange(sleepInfo.connection,
-                                         (long)messageArgument);
-            if (kIOReturnSuccess != result)
-                vlog("sleep prevention failed. (%d)\n", result);
-            break;
-        case kIOMessageSystemHasPoweredOn:
-            // Handle wakeup.
-            break;
-    }
-}
-#endif
-
-
-void PreventSleep(void)
-{
-#if defined(__APPLE__)
-    vlog("Disabling sleep... ");
-    sleepInfo.iterator = (io_object_t)0;
-    sleepInfo.port = NULL;
-    sleepInfo.connection = IORegisterForSystemPower(
-        &sleepInfo, // void * refcon,
-        &sleepInfo.port, // IONotificationPortRef * thePortRef,
-        sleepCallback, // IOServiceInterestCallback callback,
-        &sleepInfo.iterator // io_object_t * notifier
-    );
-
-    if ((io_connect_t)0 == sleepInfo.connection)
-        vlog("failed.\n");
-    else
-        vlog("done.\n");
-
-    CFRunLoopAddSource(CFRunLoopGetCurrent(),
-                       IONotificationPortGetRunLoopSource(sleepInfo.port),
-                       kCFRunLoopDefaultMode);
-#else
-    vlog("*** PreventSleep() is not implemented on this platform.\n");
-#endif
-}
-
-void ResumeSleep(void)
-{
-#if defined(__APPLE__)
-    IOReturn result = IODeregisterForSystemPower(&sleepInfo.iterator);
-    if (0 != result)
-        vlog("Got error %d restoring sleep \n", result);
-    else
-        vlog("Sleep restored.\n");
-#else
-    vlog("*** ResumeSleep() is not implemented on this platform.\n");
-#endif
-}

diff --git a/test_conformance/math_brute_force/sleep.h b/test_conformance/math_brute_force/sleep.h
deleted file mode 100644
index ca64395..0000000
--- a/test_conformance/math_brute_force/sleep.h
+++ /dev/null

@@ -1,22 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef SLEEP_H
-#define SLEEP_H
-
-void PreventSleep(void);
-void ResumeSleep(void);
-
-#endif /* SLEEP_H */

diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp
new file mode 100644
index 0000000..1bd7d88
--- /dev/null
+++ b/test_conformance/math_brute_force/ternary.cpp

@@ -0,0 +1,1359 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+#define CORRECTLY_ROUNDED 0
+#define FLUSHED 1
+
+int TestFunc_Float_Float_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_Double(const Func *f, MTdata);
+
+extern const vtbl _ternary = { "ternary", TestFunc_Float_Float_Float_Float,
+                               TestFunc_Double_Double_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2,  __global float", sizeNames[vectorSize], "* in3 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2 , __global float* in3)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       float3 f0 = vload3( 0, in + 3 * i );\n"
+        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
+        "       f0 = ", name, "( f0, f1, f2 );\n"
+        "       vstore3( f0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       float3 f0, f1, f2;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       f0 = ", name, "( f0, f1, f2 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = f0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = f0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2,  __global double", sizeNames[vectorSize], "* in3 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2 , __global double* in3)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       double3 d0 = vload3( 0, in + 3 * i );\n"
+        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
+        "       d0 = ", name, "( d0, d1, d2 );\n"
+        "       vstore3( d0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       double3 d0, d1, d2;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       d0 = ", name, "( d0, d1, d2 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = d0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = d0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.75f, -1.5f, -1.25f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24), -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.75f, 1.5f, 1.25f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24), +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+
+int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    uint64_t step = bufferSize / sizeof( float );
+    int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport;
+    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
+    float float_ulps;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    /*
+     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+     if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+     return error;
+     */
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        j = 0;
+        if( i == 0 )
+        { // test edge cases
+            float *fp = (float *)gIn;
+            float *fp2 = (float *)gIn2;
+            float *fp3 = (float *)gIn3;
+            uint32_t x, y, z;  x = y = z = 0;
+            for( ; j < bufferSize / sizeof( float ); j++ )
+            {
+                fp[j] = specialValuesFloat[x];
+                fp2[j] = specialValuesFloat[y];
+                fp3[j] = specialValuesFloat[z];
+
+                if( ++x >= specialValuesFloatCount )
+                {
+                    x = 0;
+                    if( ++y >= specialValuesFloatCount )
+                    {
+                        y = 0;
+                        if( ++z >= specialValuesFloatCount )
+                            break;
+                    }
+                }
+            }
+            if( j == bufferSize / sizeof( float ) )
+                vlog_error( "Test Error: not all special cases tested!\n" );
+        }
+
+        for( ; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+        float *s3 = (float *)gIn3;
+        if( skipNanInf )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+                overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+        }
+
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float err;
+                    int fail;
+                    float test = ((float*) q)[j];
+                    float correct = f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    if( skipNanInf )
+                    {
+                        if( overflow[j]                                         ||
+                           IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                           IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        ||
+                           IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                           IsFloatInfinity(s3[j])   || IsFloatNaN(s3[j])       )
+                            continue;
+                    }
+
+
+                    err = Ulp_Error( test, correct );
+                    fail = ! (fabsf(err) <= float_ulps);
+
+                    if( fail && ftz )
+                    {
+                        float correct2, err2;
+
+                        // retry per section 6.5.3.2  with flushing on
+                        if( 0.0f == test && 0.0f == f->func.f_fma( s[j], s2[j], s3[j], FLUSHED ) )
+                        {
+                            fail = 0;
+                            err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsFloatSubnormal( s[j] ) )
+                        { // look at me,
+                            float err3, correct3;
+
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+
+                            correct2 = f->func.f_fma( 0.0f, s2[j], s3[j], CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( -0.0f, s2[j], s3[j], CORRECTLY_ROUNDED );
+
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( 0.0f == test &&
+                                ( 0.0f == f->func.f_fma(  0.0f, s2[j], s3[j], FLUSHED )  ||
+                                  0.0f == f->func.f_fma( -0.0f, s2[j], s3[j], FLUSHED ) )
+                              )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                double correct4, correct5;
+                                float err4, err5;
+
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+
+                                correct2 = f->func.f_fma( 0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( -0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( 0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( -0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( 0.0f == test &&
+                                    ( 0.0f == f->func.f_fma(  0.0f,  0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma( -0.0f,  0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma(  0.0f, -0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma( -0.0f, -0.0f, s3[j], FLUSHED )  )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+
+                                if( IsFloatSubnormal( s3[j] )  )
+                                {
+                                    if( test == 0.0f )  // 0*0+0 is 0
+                                    {
+                                        fail = 0;
+                                        err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                double correct4, correct5;
+                                float err4, err5;
+
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+
+                                correct2 = f->func.f_fma( 0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( -0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( 0.0f,  s2[j], -0.0f, CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( -0.0f, s2[j], -0.0f, CORRECTLY_ROUNDED );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( 0.0f == test &&
+                                    (   0.0f == f->func.f_fma( 0.0f, s2[j], 0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma(-0.0f, s2[j], 0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma( 0.0f, s2[j],-0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma(-0.0f, s2[j],-0.0f, FLUSHED )   )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal( s2[j] ) )
+                        {
+                            double correct2, correct3;
+                            float err2, err3;
+
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+
+                            correct2 = f->func.f_fma( s[j], 0.0f, s3[j], CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( s[j], -0.0f, s3[j], CORRECTLY_ROUNDED );
+
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( 0.0f == test &&
+                                (   0.0f == f->func.f_fma( s[j], 0.0f, s3[j], FLUSHED )  ||
+                                    0.0f == f->func.f_fma( s[j], -0.0f, s3[j], FLUSHED ) )
+                            )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                double correct4, correct5;
+                                float err4, err5;
+
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+
+                                correct2 = f->func.f_fma( s[j], 0.0f, 0.0f, CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( s[j], -0.0f, 0.0f, CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( s[j], 0.0f, -0.0f, CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( s[j], -0.0f, -0.0f, CORRECTLY_ROUNDED );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( 0.0f == test &&
+                                    (   0.0f == f->func.f_fma( s[j], 0.0f, 0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j],-0.0f, 0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j], 0.0f,-0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j],-0.0f,-0.0f, FLUSHED )    )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal(s3[j]) )
+                        {
+                            double correct2, correct3;
+                            float err2, err3;
+
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+
+                            correct2 = f->func.f_fma( s[j], s2[j], 0.0f, CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( s[j], s2[j], -0.0f, CORRECTLY_ROUNDED );
+
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( 0.0f == test &&
+                                (   0.0f == f->func.f_fma( s[j], s2[j], 0.0f, FLUSHED ) ||
+                                    0.0f == f->func.f_fma( s[j], s2[j],-0.0f, FLUSHED )  )
+                            )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a} ({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((cl_uint*)s)[j], ((cl_uint*)s2)[j], ((cl_uint*)s3)[j],  ((float*) gOut_Ref)[j], test );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,  bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static const size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+
+int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    double maxErrorVal3 = 0.0f;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( double );
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info ) ))
+    {
+        return error;
+    }
+    /*
+     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+     if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+     return error;
+     */
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        j = 0;
+        if( i == 0 )
+        { // test edge cases
+            uint32_t x, y, z;  x = y = z = 0;
+            for( ; j < bufferSize / sizeof( double ); j++ )
+            {
+                p[j] = specialValuesDouble[x];
+                p2[j] = specialValuesDouble[y];
+                p3[j] = specialValuesDouble[z];
+                if( ++x >= specialValuesDoubleCount )
+                {
+                    x = 0;
+                    if( ++y >= specialValuesDoubleCount )
+                    {
+                        y = 0;
+                        if( ++z >= specialValuesDoubleCount )
+                            break;
+                    }
+                }
+            }
+            if( j == bufferSize / sizeof( double ) )
+                vlog_error( "Test Error: not all special cases tested!\n" );
+        }
+
+        for( ; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+        double *s3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleSubnormal(correct) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsDoubleSubnormal( s[j] ) )
+                        { // look at me,
+                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
+                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
+                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
+                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                if( IsDoubleSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f );
+                                    long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f );
+                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
+                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
+                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
+                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
+                                                     (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
+                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
+                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal( s2[j] ) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
+                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps )  || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
+                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
+                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal(s3[j]) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
+                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: %f ulp error at {%.13la, %.13la, %.13la}: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+

diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
deleted file mode 100644
index 606fdc5..0000000
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ /dev/null

@@ -1,740 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-#define CORRECTLY_ROUNDED 0
-#define FLUSHED 1
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-
-    +NAN,
-    +INFINITY,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
-                                         bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    double maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    Force64BitFPUPrecision();
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        double *p2 = (double *)gIn2;
-        double *p3 = (double *)gIn3;
-        size_t idx = 0;
-
-        if (i == 0)
-        { // test edge cases
-            uint32_t x, y, z;
-            x = y = z = 0;
-            for (; idx < BUFFER_SIZE / sizeof(double); idx++)
-            {
-                p[idx] = specialValues[x];
-                p2[idx] = specialValues[y];
-                p3[idx] = specialValues[z];
-                if (++x >= specialValuesCount)
-                {
-                    x = 0;
-                    if (++y >= specialValuesCount)
-                    {
-                        y = 0;
-                        if (++z >= specialValuesCount) break;
-                    }
-                }
-            }
-            if (idx == BUFFER_SIZE / sizeof(double))
-                vlog_error("Test Error: not all special cases tested!\n");
-        }
-
-        for (; idx < BUFFER_SIZE / sizeof(double); idx++)
-        {
-            p[idx] = DoubleFromUInt32(genrand_int32(d));
-            p2[idx] = DoubleFromUInt32(genrand_int32(d));
-            p3[idx] = DoubleFromUInt32(genrand_int32(d));
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        double *s = (double *)gIn;
-        double *s2 = (double *)gIn2;
-        double *s3 = (double *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-            r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    double test = ((double *)q)[j];
-                    long double correct = f->dfunc.f_fff(s[j], s2[j], s3[j]);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    int fail = !(fabsf(err) <= f->double_ulps);
-
-                    if (fail && ftz)
-                    {
-                        // retry per section 6.5.3.2
-                        if (IsDoubleSubnormal(correct))
-                        { // look at me,
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // retry per section 6.5.3.3
-                        if (fail && IsDoubleSubnormal(s[j]))
-                        { // look at me,
-                            long double correct2 =
-                                f->dfunc.f_fff(0.0, s2[j], s3[j]);
-                            long double correct3 =
-                                f->dfunc.f_fff(-0.0, s2[j], s3[j]);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps))
-                                    && (!(fabsf(err3) <= f->double_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct3,
-                                                           f->double_ulps))
-                            { // look at me now,
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-
-                            // try with first two args as zero
-                            if (IsDoubleSubnormal(s2[j]))
-                            { // its fun to have fun,
-                                correct2 = f->dfunc.f_fff(0.0, 0.0, s3[j]);
-                                correct3 = f->dfunc.f_fff(-0.0, 0.0, s3[j]);
-                                long double correct4 =
-                                    f->dfunc.f_fff(0.0, -0.0, s3[j]);
-                                long double correct5 =
-                                    f->dfunc.f_fff(-0.0, -0.0, s3[j]);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= f->double_ulps))
-                                        && (!(fabsf(err3) <= f->double_ulps))
-                                        && (!(fabsf(err4) <= f->double_ulps))
-                                        && (!(fabsf(err5) <= f->double_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                // retry per section 6.5.3.4
-                                if (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct4,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct5,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-
-                                if (IsDoubleSubnormal(s3[j]))
-                                { // but you have to know how!
-                                    correct2 = f->dfunc.f_fff(0.0, 0.0, 0.0f);
-                                    correct3 = f->dfunc.f_fff(-0.0, 0.0, 0.0f);
-                                    correct4 = f->dfunc.f_fff(0.0, -0.0, 0.0f);
-                                    correct5 = f->dfunc.f_fff(-0.0, -0.0, 0.0f);
-                                    long double correct6 =
-                                        f->dfunc.f_fff(0.0, 0.0, -0.0f);
-                                    long double correct7 =
-                                        f->dfunc.f_fff(-0.0, 0.0, -0.0f);
-                                    long double correct8 =
-                                        f->dfunc.f_fff(0.0, -0.0, -0.0f);
-                                    long double correct9 =
-                                        f->dfunc.f_fff(-0.0, -0.0, -0.0f);
-                                    err2 = Bruteforce_Ulp_Error_Double(
-                                        test, correct2);
-                                    err3 = Bruteforce_Ulp_Error_Double(
-                                        test, correct3);
-                                    err4 = Bruteforce_Ulp_Error_Double(
-                                        test, correct4);
-                                    err5 = Bruteforce_Ulp_Error_Double(
-                                        test, correct5);
-                                    float err6 = Bruteforce_Ulp_Error_Double(
-                                        test, correct6);
-                                    float err7 = Bruteforce_Ulp_Error_Double(
-                                        test, correct7);
-                                    float err8 = Bruteforce_Ulp_Error_Double(
-                                        test, correct8);
-                                    float err9 = Bruteforce_Ulp_Error_Double(
-                                        test, correct9);
-                                    fail = fail
-                                        && ((!(fabsf(err2) <= f->double_ulps))
-                                            && (!(fabsf(err3)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err4)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err5)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err5)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err6)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err7)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err8)
-                                                  <= f->double_ulps)));
-                                    if (fabsf(err2) < fabsf(err)) err = err2;
-                                    if (fabsf(err3) < fabsf(err)) err = err3;
-                                    if (fabsf(err4) < fabsf(err)) err = err4;
-                                    if (fabsf(err5) < fabsf(err)) err = err5;
-                                    if (fabsf(err6) < fabsf(err)) err = err6;
-                                    if (fabsf(err7) < fabsf(err)) err = err7;
-                                    if (fabsf(err8) < fabsf(err)) err = err8;
-                                    if (fabsf(err9) < fabsf(err)) err = err9;
-
-                                    // retry per section 6.5.3.4
-                                    if (IsDoubleResultSubnormal(correct2,
-                                                                f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct3, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct4, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct5, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct6, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct7, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct8, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct9, f->double_ulps))
-                                    {
-                                        fail = fail && (test != 0.0f);
-                                        if (!fail) err = 0.0f;
-                                    }
-                                }
-                            }
-                            else if (IsDoubleSubnormal(s3[j]))
-                            {
-                                correct2 = f->dfunc.f_fff(0.0, s2[j], 0.0);
-                                correct3 = f->dfunc.f_fff(-0.0, s2[j], 0.0);
-                                long double correct4 =
-                                    f->dfunc.f_fff(0.0, s2[j], -0.0);
-                                long double correct5 =
-                                    f->dfunc.f_fff(-0.0, s2[j], -0.0);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= f->double_ulps))
-                                        && (!(fabsf(err3) <= f->double_ulps))
-                                        && (!(fabsf(err4) <= f->double_ulps))
-                                        && (!(fabsf(err5) <= f->double_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                // retry per section 6.5.3.4
-                                if (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct4,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct5,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsDoubleSubnormal(s2[j]))
-                        {
-                            long double correct2 =
-                                f->dfunc.f_fff(s[j], 0.0, s3[j]);
-                            long double correct3 =
-                                f->dfunc.f_fff(s[j], -0.0, s3[j]);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps))
-                                    && (!(fabsf(err3) <= f->double_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct3,
-                                                           f->double_ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-
-                            // try with second two args as zero
-                            if (IsDoubleSubnormal(s3[j]))
-                            {
-                                correct2 = f->dfunc.f_fff(s[j], 0.0, 0.0);
-                                correct3 = f->dfunc.f_fff(s[j], -0.0, 0.0);
-                                long double correct4 =
-                                    f->dfunc.f_fff(s[j], 0.0, -0.0);
-                                long double correct5 =
-                                    f->dfunc.f_fff(s[j], -0.0, -0.0);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= f->double_ulps))
-                                        && (!(fabsf(err3) <= f->double_ulps))
-                                        && (!(fabsf(err4) <= f->double_ulps))
-                                        && (!(fabsf(err5) <= f->double_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                // retry per section 6.5.3.4
-                                if (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct4,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct5,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsDoubleSubnormal(s3[j]))
-                        {
-                            long double correct2 =
-                                f->dfunc.f_fff(s[j], s2[j], 0.0);
-                            long double correct3 =
-                                f->dfunc.f_fff(s[j], s2[j], -0.0);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps))
-                                    && (!(fabsf(err3) <= f->double_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct3,
-                                                           f->double_ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                        maxErrorVal2 = s2[j];
-                        maxErrorVal3 = s3[j];
-                    }
-
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %sD%s: %f ulp error at {%.13la, "
-                                   "%.13la, %.13la}: *%.13la vs. %.13la\n",
-                                   f->name, sizeNames[k], err, s[j], s2[j],
-                                   s3[j], ((double *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
deleted file mode 100644
index e52c0a0..0000000
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ /dev/null

@@ -1,875 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-#define CORRECTLY_ROUNDED 0
-#define FLUSHED 1
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -INFINITY,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.75f,
-    -1.5f,
-    -1.25f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24),
-    -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-
-    +NAN,
-    +INFINITY,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.75f,
-    1.5f,
-    1.25f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24),
-    +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-};
-
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-
-int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    float maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-
-    cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
-
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-
-    int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        cl_uint *p2 = (cl_uint *)gIn2;
-        cl_uint *p3 = (cl_uint *)gIn3;
-        size_t idx = 0;
-
-        if (i == 0)
-        { // test edge cases
-            float *fp = (float *)gIn;
-            float *fp2 = (float *)gIn2;
-            float *fp3 = (float *)gIn3;
-            uint32_t x, y, z;
-            x = y = z = 0;
-            for (; idx < BUFFER_SIZE / sizeof(float); idx++)
-            {
-                fp[idx] = specialValues[x];
-                fp2[idx] = specialValues[y];
-                fp3[idx] = specialValues[z];
-
-                if (++x >= specialValuesCount)
-                {
-                    x = 0;
-                    if (++y >= specialValuesCount)
-                    {
-                        y = 0;
-                        if (++z >= specialValuesCount) break;
-                    }
-                }
-            }
-            if (idx == BUFFER_SIZE / sizeof(float))
-                vlog_error("Test Error: not all special cases tested!\n");
-        }
-
-        for (; idx < BUFFER_SIZE / sizeof(float); idx++)
-        {
-            p[idx] = genrand_int32(d);
-            p2[idx] = genrand_int32(d);
-            p3[idx] = genrand_int32(d);
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        float *s = (float *)gIn;
-        float *s2 = (float *)gIn2;
-        float *s3 = (float *)gIn3;
-        if (skipNanInf)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                feclearexcept(FE_OVERFLOW);
-                r[j] =
-                    (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
-                overflow[j] =
-                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-            }
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                r[j] =
-                    (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
-        }
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    float err;
-                    int fail;
-                    float test = ((float *)q)[j];
-                    float correct =
-                        f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
-
-                    // Per section 10 paragraph 6, accept any result if an input
-                    // or output is a infinity or NaN or overflow
-                    if (skipNanInf)
-                    {
-                        if (overflow[j] || IsFloatInfinity(correct)
-                            || IsFloatNaN(correct) || IsFloatInfinity(s[j])
-                            || IsFloatNaN(s[j]) || IsFloatInfinity(s2[j])
-                            || IsFloatNaN(s2[j]) || IsFloatInfinity(s3[j])
-                            || IsFloatNaN(s3[j]))
-                            continue;
-                    }
-
-
-                    err = Ulp_Error(test, correct);
-                    fail = !(fabsf(err) <= float_ulps);
-
-                    if (fail && ftz)
-                    {
-                        float correct2, err2;
-
-                        // retry per section 6.5.3.2  with flushing on
-                        if (0.0f == test
-                            && 0.0f
-                                == f->func.f_fma(s[j], s2[j], s3[j], FLUSHED))
-                        {
-                            fail = 0;
-                            err = 0.0f;
-                        }
-
-                        // retry per section 6.5.3.3
-                        if (fail && IsFloatSubnormal(s[j]))
-                        { // look at me,
-                            float err3, correct3;
-
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                            correct2 = f->func.f_fma(0.0f, s2[j], s3[j],
-                                                     CORRECTLY_ROUNDED);
-                            correct3 = f->func.f_fma(-0.0f, s2[j], s3[j],
-                                                     CORRECTLY_ROUNDED);
-
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3))
-                                    continue;
-                            }
-
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps))
-                                    && (!(fabsf(err3) <= float_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (0.0f == test
-                                && (0.0f
-                                        == f->func.f_fma(0.0f, s2[j], s3[j],
-                                                         FLUSHED)
-                                    || 0.0f
-                                        == f->func.f_fma(-0.0f, s2[j], s3[j],
-                                                         FLUSHED)))
-                            {
-                                fail = 0;
-                                err = 0.0f;
-                            }
-
-                            // try with first two args as zero
-                            if (IsFloatSubnormal(s2[j]))
-                            { // its fun to have fun,
-                                double correct4, correct5;
-                                float err4, err5;
-
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                                correct2 = f->func.f_fma(0.0f, 0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-                                correct3 = f->func.f_fma(-0.0f, 0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-                                correct4 = f->func.f_fma(0.0f, -0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-                                correct5 = f->func.f_fma(-0.0f, -0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow
-                                if (!gInfNanSupport)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3)
-                                        || IsFloatInfinity(correct4)
-                                        || IsFloatNaN(correct4)
-                                        || IsFloatInfinity(correct5)
-                                        || IsFloatNaN(correct5))
-                                        continue;
-                                }
-
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                err4 = Ulp_Error(test, correct4);
-                                err5 = Ulp_Error(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= float_ulps))
-                                        && (!(fabsf(err3) <= float_ulps))
-                                        && (!(fabsf(err4) <= float_ulps))
-                                        && (!(fabsf(err5) <= float_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                // retry per section 6.5.3.4
-                                if (0.0f == test
-                                    && (0.0f
-                                            == f->func.f_fma(0.0f, 0.0f, s3[j],
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, 0.0f, s3[j],
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(0.0f, -0.0f, s3[j],
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, -0.0f,
-                                                             s3[j], FLUSHED)))
-                                {
-                                    fail = 0;
-                                    err = 0.0f;
-                                }
-
-                                if (IsFloatSubnormal(s3[j]))
-                                {
-                                    if (test == 0.0f) // 0*0+0 is 0
-                                    {
-                                        fail = 0;
-                                        err = 0.0f;
-                                    }
-                                }
-                            }
-                            else if (IsFloatSubnormal(s3[j]))
-                            {
-                                double correct4, correct5;
-                                float err4, err5;
-
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                                correct2 = f->func.f_fma(0.0f, s2[j], 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct3 = f->func.f_fma(-0.0f, s2[j], 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct4 = f->func.f_fma(0.0f, s2[j], -0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct5 = f->func.f_fma(-0.0f, s2[j], -0.0f,
-                                                         CORRECTLY_ROUNDED);
-
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow
-                                if (!gInfNanSupport)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3)
-                                        || IsFloatInfinity(correct4)
-                                        || IsFloatNaN(correct4)
-                                        || IsFloatInfinity(correct5)
-                                        || IsFloatNaN(correct5))
-                                        continue;
-                                }
-
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                err4 = Ulp_Error(test, correct4);
-                                err5 = Ulp_Error(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= float_ulps))
-                                        && (!(fabsf(err3) <= float_ulps))
-                                        && (!(fabsf(err4) <= float_ulps))
-                                        && (!(fabsf(err5) <= float_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                // retry per section 6.5.3.4
-                                if (0.0f == test
-                                    && (0.0f
-                                            == f->func.f_fma(0.0f, s2[j], 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, s2[j], 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(0.0f, s2[j], -0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, s2[j],
-                                                             -0.0f, FLUSHED)))
-                                {
-                                    fail = 0;
-                                    err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsFloatSubnormal(s2[j]))
-                        {
-                            double correct2, correct3;
-                            float err2, err3;
-
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                            correct2 = f->func.f_fma(s[j], 0.0f, s3[j],
-                                                     CORRECTLY_ROUNDED);
-                            correct3 = f->func.f_fma(s[j], -0.0f, s3[j],
-                                                     CORRECTLY_ROUNDED);
-
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3))
-                                    continue;
-                            }
-
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps))
-                                    && (!(fabsf(err3) <= float_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (0.0f == test
-                                && (0.0f
-                                        == f->func.f_fma(s[j], 0.0f, s3[j],
-                                                         FLUSHED)
-                                    || 0.0f
-                                        == f->func.f_fma(s[j], -0.0f, s3[j],
-                                                         FLUSHED)))
-                            {
-                                fail = 0;
-                                err = 0.0f;
-                            }
-
-                            // try with second two args as zero
-                            if (IsFloatSubnormal(s3[j]))
-                            {
-                                double correct4, correct5;
-                                float err4, err5;
-
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                                correct2 = f->func.f_fma(s[j], 0.0f, 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct3 = f->func.f_fma(s[j], -0.0f, 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct4 = f->func.f_fma(s[j], 0.0f, -0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct5 = f->func.f_fma(s[j], -0.0f, -0.0f,
-                                                         CORRECTLY_ROUNDED);
-
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow
-                                if (!gInfNanSupport)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3)
-                                        || IsFloatInfinity(correct4)
-                                        || IsFloatNaN(correct4)
-                                        || IsFloatInfinity(correct5)
-                                        || IsFloatNaN(correct5))
-                                        continue;
-                                }
-
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                err4 = Ulp_Error(test, correct4);
-                                err5 = Ulp_Error(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= float_ulps))
-                                        && (!(fabsf(err3) <= float_ulps))
-                                        && (!(fabsf(err4) <= float_ulps))
-                                        && (!(fabsf(err5) <= float_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-
-                                // retry per section 6.5.3.4
-                                if (0.0f == test
-                                    && (0.0f
-                                            == f->func.f_fma(s[j], 0.0f, 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(s[j], -0.0f, 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(s[j], 0.0f, -0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(s[j], -0.0f, -0.0f,
-                                                             FLUSHED)))
-                                {
-                                    fail = 0;
-                                    err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsFloatSubnormal(s3[j]))
-                        {
-                            double correct2, correct3;
-                            float err2, err3;
-
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-
-                            correct2 = f->func.f_fma(s[j], s2[j], 0.0f,
-                                                     CORRECTLY_ROUNDED);
-                            correct3 = f->func.f_fma(s[j], s2[j], -0.0f,
-                                                     CORRECTLY_ROUNDED);
-
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3))
-                                    continue;
-                            }
-
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps))
-                                    && (!(fabsf(err3) <= float_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (0.0f == test
-                                && (0.0f
-                                        == f->func.f_fma(s[j], s2[j], 0.0f,
-                                                         FLUSHED)
-                                    || 0.0f
-                                        == f->func.f_fma(s[j], s2[j], -0.0f,
-                                                         FLUSHED)))
-                            {
-                                fail = 0;
-                                err = 0.0f;
-                            }
-                        }
-                    }
-
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                        maxErrorVal2 = s2[j];
-                        maxErrorVal3 = s3[j];
-                    }
-
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
-                            "({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n",
-                            f->name, sizeNames[k], err, s[j], s2[j], s3[j],
-                            ((cl_uint *)s)[j], ((cl_uint *)s2)[j],
-                            ((cl_uint *)s3)[j], ((float *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/test_functions.h b/test_conformance/math_brute_force/test_functions.h
deleted file mode 100644
index 78aef9c..0000000
--- a/test_conformance/math_brute_force/test_functions.h
+++ /dev/null

@@ -1,118 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TEST_FUNCTIONS_H
-#define TEST_FUNCTIONS_H
-
-#include "function_list.h"
-
-// float foo(float)
-int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(double)
-int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode);
-
-// int foo(float)
-int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode);
-
-// int foo(double)
-int TestFunc_Int_Double(const Func *f, MTdata, bool relaxedMode);
-
-// float foo(uint)
-int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(ulong)
-int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode);
-
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(float)
-int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
-
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(double)
-int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode);
-
-// float foo(float, float)
-int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(double, double)
-int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode);
-
-// Special handling for nextafter.
-// float foo(float, float)
-int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata,
-                                         bool relaxedMode);
-
-// Special handling for nextafter.
-// double foo(double, double)
-int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata,
-                                            bool relaxedMode);
-
-// float op float
-int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
-                                        bool relaxedMode);
-
-// double op double
-int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata,
-                                           bool relaxedMode);
-
-// float foo(float, int)
-int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(double, int)
-int TestFunc_Double_Double_Int(const Func *f, MTdata, bool relaxedMode);
-
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(float, float)
-int TestMacro_Int_Float_Float(const Func *f, MTdata, bool relaxedMode);
-
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(double, double)
-int TestMacro_Int_Double_Double(const Func *f, MTdata, bool relaxedMode);
-
-// float foo(float, float, float)
-int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(double, double, double)
-int TestFunc_Double_Double_Double_Double(const Func *f, MTdata,
-                                         bool relaxedMode);
-
-// float foo(float, float*)
-int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(double, double*)
-int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode);
-
-// float foo(float, int*)
-int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(double, int*)
-int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode);
-
-// float foo(float, float, int*)
-int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode);
-
-// double foo(double, double, int*)
-int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode);
-
-// Special handling for mad.
-// float mad(float, float, float)
-int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
-
-// Special handling for mad.
-// double mad(double, double, double)
-int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode);
-
-#endif

diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp
new file mode 100644
index 0000000..a979d07
--- /dev/null
+++ b/test_conformance/math_brute_force/unary.cpp

@@ -0,0 +1,1209 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+#if defined( __APPLE__ )
+    #include <sys/time.h>
+#endif
+
+int TestFunc_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double(const Func *f, MTdata);
+
+extern const vtbl _unary = { "unary", TestFunc_Float_Float,
+                             TestFunc_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       f0 = ", name, "( f0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       f0 = ", name, "( f0 );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       f0 = ", name, "( f0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value.  Init to 0.
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    int         isRangeLimited;                     // 1 if the function is only to be evaluated over a range
+    float       half_sin_cos_tan_limit;
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    int skipTestingRelaxed = ( gTestFastRelaxed && strcmp(f->name,"tan") == 0 );
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+    }
+
+    // Check for special cases for unary float
+    test_info.isRangeLimited = 0;
+    test_info.half_sin_cos_tan_limit = 0;
+    if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") )
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = 1.0f + test_info.ulps * (FLT_EPSILON/2.0f);             // out of range results from finite inputs must be in [-1,1]
+    }
+    else if( 0 == strcmp( f->name, "half_tan"))
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = INFINITY;             // out of range resut from finite inputs must be numeric
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting || skipTestingRelaxed)
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+
+        if( skipTestingRelaxed )
+        {
+          vlog(" (rlx skip correctness testing)\n");
+          goto exit;
+        }
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                ((float*)p)[j] = (float) genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                p[j] = genrand_int32(d) & 0x7fffffff;
+        else
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError( test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double current_time = SubtractTime( endTime, startTime );
+                sum += current_time;
+                if( current_time < bestTime )
+                    bestTime = current_time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    float   ulps = job->ulps;
+    fptr    func = job->f->func;
+    const char * fname = job->f->name;
+    if ( gTestFastRelaxed  )
+    {
+        ulps = job->f->relaxed_error;
+        func = job->f->rfunc;
+    }
+
+    cl_uint j, k;
+    cl_int error;
+
+    int isRangeLimited = job->isRangeLimited;
+    float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
+    int ftz = job->ftz;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+      p[j] = base + j * scale;
+      if( gTestFastRelaxed )
+      {
+        float p_j = *(float *) &p[j];
+        if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 )  //the domain of the function is [-pi,pi]
+        {
+          if( fabs(p_j) > M_PI )
+            p[j] = NAN;
+        }
+
+        if ( strcmp( fname, "reciprocal" ) == 0 )
+        {
+          if( fabs(p_j) > 0x7E800000 ) //the domain of the function is [2^-126,2^126]
+            p[j] = NAN;
+        }
+      }
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    float *r = (float *)gOut_Ref + thread_id * buffer_elements;
+    float *s = (float *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (float) func.f_f( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+    //Verify data
+    uint32_t *t = (uint32_t *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            uint32_t *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_f( s[j] );
+                float err = Ulp_Error( test, correct );
+                float abs_error = Abs_Error( test, correct );
+                int fail = 0;
+                int use_abs_error = 0;
+
+                // it is possible for the output to not match the reference result but for Ulp_Error
+                // to be zero, for example -1.#QNAN vs. 1.#QNAN. In such cases there is no failure
+                if (err == 0.0f)
+                {
+                    fail = 0;
+                }
+                else if( gTestFastRelaxed )
+                {
+                    if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 )
+                    {
+                        fail = ! (fabsf(abs_error) <= ulps);
+                        use_abs_error = 1;
+                    }
+
+                    if ( strcmp(fname, "reciprocal") == 0 )
+                    {
+                        fail = ! (fabsf(err) <= ulps);
+                    }
+
+                    if ( strcmp(fname, "exp") == 0 || strcmp(fname, "exp2") == 0 )
+                    {
+
+                        float exp_error = 3+floor(fabs(2*s[j]));
+                        fail = ! (fabsf(err) <= exp_error);
+                        ulps = exp_error;
+                    }
+                    if (strcmp(fname, "tan") == 0) {
+
+                        if(  !gFastRelaxedDerived )
+                        {
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+                        // Else fast math derived implementation does not require ULP verification
+                    }
+                    if (strcmp(fname, "exp10") == 0)
+                    {
+                        if(  !gFastRelaxedDerived )
+                        {
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+                        // Else fast math derived implementation does not require ULP verification
+                    }
+                    if ( strcmp(fname,"log") == 0 || strcmp(fname,"log2") == 0 )
+                    {
+                        if( s[j] >= 0.5 && s[j] <= 2 )
+                        {
+                            fail = ! (fabsf(abs_error) <= ulps );
+                        }
+                        else
+                        {
+                            ulps = gIsEmbedded ? job->f->float_embedded_ulps : job->f->float_ulps;
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+
+                    }
+
+
+                    // fast-relaxed implies finite-only
+                    if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                        IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        ) {
+                        fail = 0;
+                        err = 0;
+                    }
+                }
+                else
+                {
+                  fail = ! (fabsf(err) <= ulps);
+                }
+
+                // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY )
+                {
+                    if( fabsf( test ) <= half_sin_cos_tan_limit )
+                    {
+                        err = 0;
+                        fail = 0;
+                    }
+                }
+
+                if( fail )
+                {
+                    if( ftz )
+                    {
+                        typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold.
+                        CheckForSubnormal isFloatResultSubnormalPtr;
+
+                        if ( gTestFastRelaxed )
+                        {
+                          isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError;
+                        }
+                        else
+                        {
+                          isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
+                        }
+                        // retry per section 6.5.3.2
+                        if( (*isFloatResultSubnormalPtr)(correct, ulps) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            double correct2 = func.f_f( 0.0 );
+                            double correct3 = func.f_f( -0.0 );
+                            float err2;
+                            float err3;
+                            if( use_abs_error )
+                            {
+                              err2 = Abs_Error( test, correct2  );
+                              err3 = Abs_Error( test, correct3  );
+                            }
+                            else
+                            {
+                              err2 = Ulp_Error( test, correct2  );
+                              err3 = Ulp_Error( test, correct3  );
+                            }
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( (*isFloatResultSubnormalPtr)(correct2, ulps ) || (*isFloatResultSubnormalPtr)(correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at %a (0x%8.8x): *%a vs. %a\n", job->f->name, sizeNames[k], err, ((float*) s)[j], ((uint32_t*) s)[j], ((float*) t)[j], test);
+                    return -1;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+    return CL_SUCCESS;
+}
+
+
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    float   ulps = job->ulps;
+    dptr    func = job->f->dfunc;
+    cl_uint j, k;
+    cl_int error;
+    int ftz = job->ftz;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_double *p = (cl_double*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = DoubleFromUInt32( base + j * scale);
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
+    cl_double *s = (cl_double *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_f( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+
+    //Verify data
+    cl_ulong *t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_f( s[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail )
+                {
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, ulps) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2 = func.f_f( 0.0L );
+                            long double correct3 = func.f_f( -0.0L );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal(correct2, ulps ) || IsDoubleResultSubnormal(correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at %.13la (0x%16.16llx): *%.13la vs. %.13la\n", job->f->name, sizeNames[k], err, ((cl_double*) gIn)[j], ((cl_ulong*) gIn)[j], ((cl_double*) gOut_Ref)[j], test );
+                    return -1;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zd buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, buffer_elements, job->scale, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+    return CL_SUCCESS;
+}
+
+int TestFunc_Double_Double(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+#if defined( __APPLE__ )
+    struct timeval  time_val;
+    gettimeofday( &time_val, NULL );
+    double start_time = time_val.tv_sec + 1e-6 * time_val.tv_usec;
+    double end_time;
+#endif
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+           goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+#if defined( __APPLE__ )
+    gettimeofday( &time_val, NULL);
+    end_time = time_val.tv_sec + 1e-6 * time_val.tv_usec;
+#endif
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+
+        if( strstr( f->name, "exp" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = (double)genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = fabs(DoubleFromUInt32( genrand_int32(d)));
+        else
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double current_time = SubtractTime( endTime, startTime );
+                sum += current_time;
+                if( current_time < bestTime )
+                    bestTime = current_time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+
+#if defined( __APPLE__ )
+    vlog( "\t(%2.2f seconds)", end_time - start_time );
+#endif
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+

diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
deleted file mode 100644
index f6fa326..0000000
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ /dev/null

@@ -1,549 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double maxErrorValue; // position of the max error value.  Init to 0.
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-
-    int isRangeLimited; // 1 if the function is only to be evaluated over a
-                        // range
-    float half_sin_cos_tan_limit;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-    test_info.relaxedMode = relaxedMode;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    cl_int error;
-    int ftz = job->ftz;
-
-    Force64BitFPUPrecision();
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Write the new values to the input array
-    cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        p[j] = DoubleFromUInt32(base + j * scale);
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    cl_double *s = (cl_double *)p;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_f(s[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Verify data
-    cl_ulong *t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_f(s[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-
-                if (fail)
-                {
-                    if (ftz)
-                    {
-                        // retry per section 6.5.3.2
-                        if (IsDoubleResultSubnormal(correct, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // retry per section 6.5.3.3
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            long double correct2 = func.f_f(0.0L);
-                            long double correct3 = func.f_f(-0.0L);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
-                               "(0x%16.16llx): *%.13la vs. %.13la\n",
-                               job->f->name, sizeNames[k], err,
-                               ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
-                               ((cl_double *)gOut_Ref)[j], test);
-                    return -1;
-                }
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zd buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, buffer_elements, job->scale, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-    return CL_SUCCESS;
-}

diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
deleted file mode 100644
index 17edc58..0000000
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ /dev/null

@@ -1,727 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-}
-
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-{
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double maxErrorValue; // position of the max error value.  Init to 0.
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
-{
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-
-    int isRangeLimited; // 1 if the function is only to be evaluated over a
-                        // range
-    float half_sin_cos_tan_limit;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Check for special cases for unary float
-    test_info.isRangeLimited = 0;
-    test_info.half_sin_cos_tan_limit = 0;
-    if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
-    {
-        test_info.isRangeLimited = 1;
-        test_info.half_sin_cos_tan_limit = 1.0f
-            + test_info.ulps
-                * (FLT_EPSILON / 2.0f); // out of range results from finite
-                                        // inputs must be in [-1,1]
-    }
-    else if (0 == strcmp(f->name, "half_tan"))
-    {
-        test_info.isRangeLimited = 1;
-        test_info.half_sin_cos_tan_limit =
-            INFINITY; // out of range resut from finite inputs must be numeric
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting || skipTestingRelaxed)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        if (skipTestingRelaxed)
-        {
-            vlog(" (rlx skip correctness testing)\n");
-            goto exit;
-        }
-
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    const char *fname = job->f->name;
-    bool relaxedMode = job->relaxedMode;
-    float ulps = getAllowedUlpError(job->f, relaxedMode);
-    if (relaxedMode)
-    {
-        func = job->f->rfunc;
-    }
-
-    cl_int error;
-
-    int isRangeLimited = job->isRangeLimited;
-    float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
-    int ftz = job->ftz;
-
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-
-    // Write the new values to the input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        p[j] = base + j * scale;
-        if (relaxedMode)
-        {
-            float p_j = *(float *)&p[j];
-            if (strcmp(fname, "sin") == 0
-                || strcmp(fname, "cos")
-                    == 0) // the domain of the function is [-pi,pi]
-            {
-                if (fabs(p_j) > M_PI) ((float *)p)[j] = NAN;
-            }
-
-            if (strcmp(fname, "reciprocal") == 0)
-            {
-                const float l_limit = HEX_FLT(+, 1, 0, -, 126);
-                const float u_limit = HEX_FLT(+, 1, 0, +, 126);
-
-                if (fabs(p_j) < l_limit
-                    || fabs(p_j) > u_limit) // the domain of the function is
-                                            // [2^-126,2^126]
-                    ((float *)p)[j] = NAN;
-            }
-        }
-    }
-
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-
-    // Calculate the correctly rounded reference result
-    float *r = (float *)gOut_Ref + thread_id * buffer_elements;
-    float *s = (float *)p;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);
-
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-
-    // Verify data
-    uint32_t *t = (uint32_t *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            uint32_t *q = out[k];
-
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                float test = ((float *)q)[j];
-                double correct = func.f_f(s[j]);
-                float err = Ulp_Error(test, correct);
-                float abs_error = Abs_Error(test, correct);
-                int fail = 0;
-                int use_abs_error = 0;
-
-                // it is possible for the output to not match the reference
-                // result but for Ulp_Error to be zero, for example -1.#QNAN
-                // vs. 1.#QNAN. In such cases there is no failure
-                if (err == 0.0f)
-                {
-                    fail = 0;
-                }
-                else if (relaxedMode)
-                {
-                    if (strcmp(fname, "sin") == 0 || strcmp(fname, "cos") == 0)
-                    {
-                        fail = !(fabsf(abs_error) <= ulps);
-                        use_abs_error = 1;
-                    }
-                    if (strcmp(fname, "sinpi") == 0
-                        || strcmp(fname, "cospi") == 0)
-                    {
-                        if (s[j] >= -1.0 && s[j] <= 1.0)
-                        {
-                            fail = !(fabsf(abs_error) <= ulps);
-                            use_abs_error = 1;
-                        }
-                    }
-
-                    if (strcmp(fname, "reciprocal") == 0)
-                    {
-                        fail = !(fabsf(err) <= ulps);
-                    }
-
-                    if (strcmp(fname, "exp") == 0 || strcmp(fname, "exp2") == 0)
-                    {
-                        float exp_error = ulps;
-
-                        if (!gIsEmbedded)
-                        {
-                            exp_error += floor(fabs(2 * s[j]));
-                        }
-
-                        fail = !(fabsf(err) <= exp_error);
-                        ulps = exp_error;
-                    }
-                    if (strcmp(fname, "tan") == 0)
-                    {
-
-                        if (!gFastRelaxedDerived)
-                        {
-                            fail = !(fabsf(err) <= ulps);
-                        }
-                        // Else fast math derived implementation does not
-                        // require ULP verification
-                    }
-                    if (strcmp(fname, "exp10") == 0)
-                    {
-                        if (!gFastRelaxedDerived)
-                        {
-                            fail = !(fabsf(err) <= ulps);
-                        }
-                        // Else fast math derived implementation does not
-                        // require ULP verification
-                    }
-                    if (strcmp(fname, "log") == 0 || strcmp(fname, "log2") == 0
-                        || strcmp(fname, "log10") == 0)
-                    {
-                        if (s[j] >= 0.5 && s[j] <= 2)
-                        {
-                            fail = !(fabsf(abs_error) <= ulps);
-                        }
-                        else
-                        {
-                            ulps = gIsEmbedded ? job->f->float_embedded_ulps
-                                               : job->f->float_ulps;
-                            fail = !(fabsf(err) <= ulps);
-                        }
-                    }
-
-
-                    // fast-relaxed implies finite-only
-                    if (IsFloatInfinity(correct) || IsFloatNaN(correct)
-                        || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
-                    {
-                        fail = 0;
-                        err = 0;
-                    }
-                }
-                else
-                {
-                    fail = !(fabsf(err) <= ulps);
-                }
-
-                // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
-                if (isRangeLimited
-                    && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16)
-                    && fabsf(s[j]) < INFINITY)
-                {
-                    if (fabsf(test) <= half_sin_cos_tan_limit)
-                    {
-                        err = 0;
-                        fail = 0;
-                    }
-                }
-
-                if (fail)
-                {
-                    if (ftz)
-                    {
-                        typedef int (*CheckForSubnormal)(
-                            double, float); // If we are in fast relaxed math,
-                                            // we have a different calculation
-                                            // for the subnormal threshold.
-                        CheckForSubnormal isFloatResultSubnormalPtr;
-
-                        if (relaxedMode)
-                        {
-                            isFloatResultSubnormalPtr =
-                                &IsFloatResultSubnormalAbsError;
-                        }
-                        else
-                        {
-                            isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
-                        }
-                        // retry per section 6.5.3.2
-                        if ((*isFloatResultSubnormalPtr)(correct, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // retry per section 6.5.3.3
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            double correct2 = func.f_f(0.0);
-                            double correct3 = func.f_f(-0.0);
-                            float err2;
-                            float err3;
-                            if (use_abs_error)
-                            {
-                                err2 = Abs_Error(test, correct2);
-                                err3 = Abs_Error(test, correct3);
-                            }
-                            else
-                            {
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                            }
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-
-                            // retry per section 6.5.3.4
-                            if ((*isFloatResultSubnormalPtr)(correct2, ulps)
-                                || (*isFloatResultSubnormalPtr)(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at %a (0x%8.8x): "
-                               "*%a vs. %a\n",
-                               job->f->name, sizeNames[k], err, ((float *)s)[j],
-                               ((uint32_t *)s)[j], ((float *)t)[j], test);
-                    return -1;
-                }
-            }
-        }
-    }
-
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-
-
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-
-    return CL_SUCCESS;
-}

diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp
new file mode 100644
index 0000000..a219741
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results.cpp

@@ -0,0 +1,992 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float2_Float(const Func *f, MTdata);
+int TestFunc_Double2_Double(const Func *f, MTdata);
+
+extern const vtbl _unary_two_results = { "unary_two_results",
+                                         TestFunc_Float2_Float,
+                                         TestFunc_Double2_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* out2, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 iout = NAN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 iout = NAN;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* out2, __global double* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 iout = NAN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 iout = NAN;\n"
+                            "       double3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_Float2_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    uint32_t l;
+    int error;
+    char const * testing_mode;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError0 = 0.0f;
+    float maxError1 = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal0 = 0.0f;
+    float maxErrorVal1 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
+    int isFract = 0 == strcmp( "fract", f->nameInCode );
+    int skipNanInf = isFract  && ! gInfNanSupport;
+    float float_ulps;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    if (gTestFastRelaxed)
+      float_ulps = f->relaxed_error;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+          for( j = 0; j < bufferSize / sizeof( float ); j++ )
+          {
+            p[j] = (uint32_t) i + j * scale;
+            if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 )
+            {
+              float pj = *(float *)&p[j];
+              if(fabs(pj) > M_PI)
+                p[j] = NAN;
+            }
+          }
+        }
+        else
+        {
+          for( j = 0; j < bufferSize / sizeof( float ); j++ )
+          {
+            p[j] = (uint32_t) i + j;
+            if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 )
+            {
+              float pj = *(float *)&p[j];
+              if(fabs(pj) > M_PI)
+                p[j] = NAN;
+            }
+          }
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        FPU_mode_type oldMode;
+        RoundingMode oldRoundMode = kRoundToNearestEven;
+        if( isFract )
+        {
+            //Calculate the correctly rounded reference result
+            memset( &oldMode, 0, sizeof( oldMode ) );
+            if( ftz )
+                ForceFTZ( &oldMode );
+
+            // Set the rounding mode to match the device
+            if (gIsInRTZMode)
+                oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        }
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *r2 = (float *)gOut_Ref2;
+        float *s = (float *)gIn;
+
+        if( skipNanInf )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                double dd;
+                feclearexcept(FE_OVERFLOW);
+
+                if( gTestFastRelaxed )
+                    r[j] = (float) f->rfunc.f_fpf( s[j], &dd );
+                else
+                    r[j] = (float) f->func.f_fpf( s[j], &dd );
+
+                r2[j] = (float) dd;
+                overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                double dd;
+                if( gTestFastRelaxed )
+                  r[j] = (float) f->rfunc.f_fpf( s[j], &dd );
+                else
+                  r[j] = (float) f->func.f_fpf( s[j], &dd );
+
+                r2[j] = (float) dd;
+            }
+        }
+
+        if( isFract && ftz )
+            RestoreFPState( &oldMode );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+        {
+            if (isFract && gIsInRTZMode)
+                (void)set_round(oldRoundMode, kfloat);
+            break;
+        }
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        uint32_t *t2 = (uint32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)gOut[k];
+                uint32_t *q2 = (uint32_t *)gOut2[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j]  )
+                {
+                    double correct, correct2;
+                    float err, err2;
+                    float test = ((float*) q)[j];
+                    float test2 = ((float*) q2)[j];
+
+                    if( gTestFastRelaxed )
+                      correct = f->rfunc.f_fpf( s[j], &correct2 );
+                    else
+                      correct = f->func.f_fpf( s[j], &correct2 );
+
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    if (gTestFastRelaxed || skipNanInf)
+                    {
+                        if (skipNanInf && overflow[j])
+                            continue;
+
+                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                        if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                            IsFloatInfinity(correct2)|| IsFloatNaN(correct2)    ||
+                            IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                            continue;
+                    }
+
+                    typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold.
+                    CheckForSubnormal isFloatResultSubnormalPtr;
+                    if( gTestFastRelaxed )
+                    {
+                      err = Abs_Error( test, correct);
+                      err2 = Abs_Error( test2, correct2);
+                      isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError;
+                    }
+                    else
+                    {
+                        err = Ulp_Error( test, correct );
+                        err2 = Ulp_Error( test2, correct2 );
+                        isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
+                    }
+                    int fail = ! (fabsf(err) <= float_ulps && fabsf(err2) <= float_ulps);
+
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( (*isFloatResultSubnormalPtr)(correct, float_ulps) )
+                        {
+                            if( (*isFloatResultSubnormalPtr) (correct2, float_ulps ))
+                            {
+                                fail = fail && ! ( test == 0.0f && test2 == 0.0f );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    err2 = 0.0f;
+                                }
+                            }
+                            else
+                            {
+                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= float_ulps);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                        else if( (*isFloatResultSubnormalPtr)(correct2, float_ulps ) )
+                        {
+                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= float_ulps);
+                            if( ! fail )
+                                err2 = 0.0f;
+                        }
+
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            double correctp, correctn;
+                            double correct2p, correct2n;
+                            float errp, err2p, errn, err2n;
+
+                            if( skipNanInf )
+                                feclearexcept(FE_OVERFLOW);
+                            if ( gTestFastRelaxed )
+                            {
+                              correctp = f->rfunc.f_fpf( 0.0, &correct2p );
+                              correctn = f->rfunc.f_fpf( -0.0, &correct2n );
+                            }
+                            else
+                            {
+                              correctp = f->func.f_fpf( 0.0, &correct2p );
+                              correctn = f->func.f_fpf( -0.0, &correct2n );
+                            }
+
+                            // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept(FE_OVERFLOW) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correctp) || IsFloatNaN(correctp)   ||
+                                    IsFloatInfinity(correctn) || IsFloatNaN(correctn)   ||
+                                    IsFloatInfinity(correct2p) || IsFloatNaN(correct2p) ||
+                                    IsFloatInfinity(correct2n) || IsFloatNaN(correct2n) )
+                                    continue;
+                            }
+
+                            if ( gTestFastRelaxed )
+                            {
+                              errp = Abs_Error( test, correctp  );
+                              err2p = Abs_Error( test, correct2p  );
+                              errn = Abs_Error( test, correctn  );
+                              err2n = Abs_Error( test, correct2n  );
+                            }
+                            else
+                            {
+                              errp = Ulp_Error( test, correctp  );
+                              err2p = Ulp_Error( test, correct2p  );
+                              errn = Ulp_Error( test, correctn  );
+                              err2n = Ulp_Error( test, correct2n  );
+                            }
+
+                            fail =  fail && ((!(fabsf(errp) <= float_ulps)) && (!(fabsf(err2p) <= float_ulps))    &&
+                                            ((!(fabsf(errn) <= float_ulps)) && (!(fabsf(err2n) <= float_ulps))) );
+                            if( fabsf( errp ) < fabsf(err ) )
+                                err = errp;
+                            if( fabsf( errn ) < fabsf(err ) )
+                                err = errn;
+                            if( fabsf( err2p ) < fabsf(err2 ) )
+                                err2 = err2p;
+                            if( fabsf( err2n ) < fabsf(err2 ) )
+                                err2 = err2n;
+
+                            // retry per section 6.5.3.4
+                            if(  (*isFloatResultSubnormalPtr)( correctp, float_ulps ) || (*isFloatResultSubnormalPtr)( correctn, float_ulps )  )
+                            {
+                              if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                              {
+                                fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                if( ! fail )
+                                  err = err2 = 0.0f;
+                              }
+                              else
+                              {
+                                fail = fail && ! (test == 0.0f && fabsf(err2) <= float_ulps);
+                                if( ! fail )
+                                  err = 0.0f;
+                              }
+                            }
+                            else if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                            {
+                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= float_ulps));
+                                if( ! fail )
+                                    err2 = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError0 )
+                    {
+                        maxError0 = fabsf(err);
+                        maxErrorVal0 = s[j];
+                    }
+                    if( fabsf(err2 ) > maxError1 )
+                    {
+                        maxError1 = fabsf(err2);
+                        maxErrorVal1 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %f} ulp error at %a: *{%a, %a} vs. {%a, %a}\n", f->name, sizeNames[k], err, err2, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((float*) gOut_Ref2)[j], test, test2 );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if (isFract && gIsInRTZMode)
+            (void)set_round(oldRoundMode, kfloat);
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog(".");
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_Double2_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError0 = 0.0f;
+    float maxError1 = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal0 = 0.0f;
+    double maxErrorVal1 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j * scale);
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *r2 = (double *)gOut_Ref2;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            long double dd;
+            r[j] = (double) f->dfunc.f_fpf( s[j], &dd );
+            r2[j] = (double) dd;
+        }
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        uint64_t *t2 = (uint64_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+                uint64_t *q2 = (uint64_t *)(gOut2[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j]  )
+                {
+                    double test = ((double*) q)[j];
+                    double test2 = ((double*) q2)[j];
+                    long double correct2;
+                    long double correct = f->dfunc.f_fpf( s[j], &correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 );
+                    int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && test2 == 0.0f );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    err2 = 0.0f;
+                                }
+                            }
+                            else
+                            {
+                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                        else if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= f->double_ulps);
+                            if( ! fail )
+                                err2 = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2p, correct2n;
+                            long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
+                            long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
+                            float errp = Bruteforce_Ulp_Error_Double( test, correctp  );
+                            float err2p = Bruteforce_Ulp_Error_Double( test, correct2p  );
+                            float errn = Bruteforce_Ulp_Error_Double( test, correctn  );
+                            float err2n = Bruteforce_Ulp_Error_Double( test, correct2n  );
+                            fail =  fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps))    &&
+                                            ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
+                            if( fabsf( errp ) < fabsf(err ) )
+                                err = errp;
+                            if( fabsf( errn ) < fabsf(err ) )
+                                err = errn;
+                            if( fabsf( err2p ) < fabsf(err2 ) )
+                                err2 = err2p;
+                            if( fabsf( err2n ) < fabsf(err2 ) )
+                                err2 = err2n;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correctp, f->double_ulps ) || IsDoubleResultSubnormal( correctn, f->double_ulps ) )
+                            {
+                                if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                                {
+                                    fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                    if( ! fail )
+                                        err = err2 = 0.0f;
+                                }
+                                else
+                                {
+                                    fail = fail && ! (test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                            else if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                            {
+                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= f->double_ulps));
+                                if( ! fail )
+                                    err2 = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError0 )
+                    {
+                        maxError0 = fabsf(err);
+                        maxErrorVal0 = s[j];
+                    }
+                    if( fabsf(err2 ) > maxError1 )
+                    {
+                        maxError1 = fabsf(err2);
+                        maxErrorVal1 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: *{%.13la, %.13la} vs. {%.13la, %.13la}\n", f->name, sizeNames[k], err, err2, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((double*) gOut_Ref2)[j], test, test2 );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double*) gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+

diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
deleted file mode 100644
index 71dd4f4..0000000
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ /dev/null

@@ -1,446 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* out2, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       double3 iout = NAN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 iout = NAN;\n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError0 = 0.0f;
-    float maxError1 = 0.0f;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal0 = 0.0f;
-    double maxErrorVal1 = 0.0f;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-    int scale =
-        (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    Force64BitFPUPrecision();
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        double *r2 = (double *)gOut_Ref2;
-        double *s = (double *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-        {
-            long double dd;
-            r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
-            r2[j] = (double)dd;
-        }
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        uint64_t *t2 = (uint64_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-                uint64_t *q2 = (uint64_t *)(gOut2[k]);
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    double test = ((double *)q)[j];
-                    double test2 = ((double *)q2)[j];
-                    long double correct2;
-                    long double correct = f->dfunc.f_fpf(s[j], &correct2);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
-                    int fail = !(fabsf(err) <= f->double_ulps
-                                 && fabsf(err2) <= f->double_ulps);
-                    if (ftz)
-                    {
-                        // retry per section 6.5.3.2
-                        if (IsDoubleResultSubnormal(correct, f->double_ulps))
-                        {
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps))
-                            {
-                                fail = fail && !(test == 0.0f && test2 == 0.0f);
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    err2 = 0.0f;
-                                }
-                            }
-                            else
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && fabsf(err2) <= f->double_ulps);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                        else if (IsDoubleResultSubnormal(correct2,
-                                                         f->double_ulps))
-                        {
-                            fail = fail
-                                && !(test2 == 0.0f
-                                     && fabsf(err) <= f->double_ulps);
-                            if (!fail) err2 = 0.0f;
-                        }
-
-                        // retry per section 6.5.3.3
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            long double correct2p, correct2n;
-                            long double correctp =
-                                f->dfunc.f_fpf(0.0, &correct2p);
-                            long double correctn =
-                                f->dfunc.f_fpf(-0.0, &correct2n);
-                            float errp =
-                                Bruteforce_Ulp_Error_Double(test, correctp);
-                            float err2p =
-                                Bruteforce_Ulp_Error_Double(test, correct2p);
-                            float errn =
-                                Bruteforce_Ulp_Error_Double(test, correctn);
-                            float err2n =
-                                Bruteforce_Ulp_Error_Double(test, correct2n);
-                            fail = fail
-                                && ((!(fabsf(errp) <= f->double_ulps))
-                                    && (!(fabsf(err2p) <= f->double_ulps))
-                                    && ((!(fabsf(errn) <= f->double_ulps))
-                                        && (!(fabsf(err2n)
-                                              <= f->double_ulps))));
-                            if (fabsf(errp) < fabsf(err)) err = errp;
-                            if (fabsf(errn) < fabsf(err)) err = errn;
-                            if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
-                            if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
-
-                            // retry per section 6.5.3.4
-                            if (IsDoubleResultSubnormal(correctp,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correctn,
-                                                           f->double_ulps))
-                            {
-                                if (IsDoubleResultSubnormal(correct2p,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct2n,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f && test2 == 0.0f);
-                                    if (!fail) err = err2 = 0.0f;
-                                }
-                                else
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f
-                                             && fabsf(err2) <= f->double_ulps);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                            else if (IsDoubleResultSubnormal(correct2p,
-                                                             f->double_ulps)
-                                     || IsDoubleResultSubnormal(correct2n,
-                                                                f->double_ulps))
-                            {
-                                fail = fail
-                                    && !(test2 == 0.0f
-                                         && (fabsf(err) <= f->double_ulps));
-                                if (!fail) err2 = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError0)
-                    {
-                        maxError0 = fabsf(err);
-                        maxErrorVal0 = s[j];
-                    }
-                    if (fabsf(err2) > maxError1)
-                    {
-                        maxError1 = fabsf(err2);
-                        maxErrorVal1 = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: "
-                            "*{%.13la, %.13la} vs. {%.13la, %.13la}\n",
-                            f->name, sizeNames[k], err, err2,
-                            ((double *)gIn)[j], ((double *)gOut_Ref)[j],
-                            ((double *)gOut_Ref2)[j], test, test2);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
-             maxErrorVal1);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
deleted file mode 100644
index 4a375ce..0000000
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ /dev/null

@@ -1,578 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* out2, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 iout = NAN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 iout = NAN;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError0 = 0.0f;
-    float maxError1 = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal0 = 0.0f;
-    float maxErrorVal1 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
-    cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
-    int isFract = 0 == strcmp("fract", f->nameInCode);
-    int skipNanInf = isFract && !gInfNanSupport;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    float float_ulps = getAllowedUlpError(f, relaxedMode);
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                p[j] = (uint32_t)i + j * scale;
-                if (relaxedMode && strcmp(f->name, "sincos") == 0)
-                {
-                    float pj = *(float *)&p[j];
-                    if (fabs(pj) > M_PI) ((float *)p)[j] = NAN;
-                }
-            }
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                p[j] = (uint32_t)i + j;
-                if (relaxedMode && strcmp(f->name, "sincos") == 0)
-                {
-                    float pj = *(float *)&p[j];
-                    if (fabs(pj) > M_PI) ((float *)p)[j] = NAN;
-                }
-            }
-        }
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        FPU_mode_type oldMode;
-        RoundingMode oldRoundMode = kRoundToNearestEven;
-        if (isFract)
-        {
-            // Calculate the correctly rounded reference result
-            memset(&oldMode, 0, sizeof(oldMode));
-            if (ftz) ForceFTZ(&oldMode);
-
-            // Set the rounding mode to match the device
-            if (gIsInRTZMode)
-                oldRoundMode = set_round(kRoundTowardZero, kfloat);
-        }
-
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        float *r2 = (float *)gOut_Ref2;
-        float *s = (float *)gIn;
-
-        if (skipNanInf)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                double dd;
-                feclearexcept(FE_OVERFLOW);
-
-                if (relaxedMode)
-                    r[j] = (float)f->rfunc.f_fpf(s[j], &dd);
-                else
-                    r[j] = (float)f->func.f_fpf(s[j], &dd);
-
-                r2[j] = (float)dd;
-                overflow[j] =
-                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-            }
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                double dd;
-                if (relaxedMode)
-                    r[j] = (float)f->rfunc.f_fpf(s[j], &dd);
-                else
-                    r[j] = (float)f->func.f_fpf(s[j], &dd);
-
-                r2[j] = (float)dd;
-            }
-        }
-
-        if (isFract && ftz) RestoreFPState(&oldMode);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting)
-        {
-            if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-            break;
-        }
-
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        uint32_t *t2 = (uint32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)gOut[k];
-                uint32_t *q2 = (uint32_t *)gOut2[k];
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    double correct, correct2;
-                    float err, err2;
-                    float test = ((float *)q)[j];
-                    float test2 = ((float *)q2)[j];
-
-                    if (relaxedMode)
-                        correct = f->rfunc.f_fpf(s[j], &correct2);
-                    else
-                        correct = f->func.f_fpf(s[j], &correct2);
-
-                    // Per section 10 paragraph 6, accept any result if an input
-                    // or output is a infinity or NaN or overflow
-                    if (relaxedMode || skipNanInf)
-                    {
-                        if (skipNanInf && overflow[j]) continue;
-                        // Note: no double rounding here.  Reference functions
-                        // calculate in single precision.
-                        if (IsFloatInfinity(correct) || IsFloatNaN(correct)
-                            || IsFloatInfinity(correct2) || IsFloatNaN(correct2)
-                            || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
-                            continue;
-                    }
-
-                    typedef int (*CheckForSubnormal)(
-                        double, float); // If we are in fast relaxed math, we
-                                        // have a different calculation for the
-                                        // subnormal threshold.
-                    CheckForSubnormal isFloatResultSubnormalPtr;
-                    if (relaxedMode)
-                    {
-                        err = Abs_Error(test, correct);
-                        err2 = Abs_Error(test2, correct2);
-                        isFloatResultSubnormalPtr =
-                            &IsFloatResultSubnormalAbsError;
-                    }
-                    else
-                    {
-                        err = Ulp_Error(test, correct);
-                        err2 = Ulp_Error(test2, correct2);
-                        isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
-                    }
-                    int fail = !(fabsf(err) <= float_ulps
-                                 && fabsf(err2) <= float_ulps);
-
-                    if (ftz)
-                    {
-                        // retry per section 6.5.3.2
-                        if ((*isFloatResultSubnormalPtr)(correct, float_ulps))
-                        {
-                            if ((*isFloatResultSubnormalPtr)(correct2,
-                                                             float_ulps))
-                            {
-                                fail = fail && !(test == 0.0f && test2 == 0.0f);
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    err2 = 0.0f;
-                                }
-                            }
-                            else
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && fabsf(err2) <= float_ulps);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                        else if ((*isFloatResultSubnormalPtr)(correct2,
-                                                              float_ulps))
-                        {
-                            fail = fail
-                                && !(test2 == 0.0f && fabsf(err) <= float_ulps);
-                            if (!fail) err2 = 0.0f;
-                        }
-
-
-                        // retry per section 6.5.3.3
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            double correctp, correctn;
-                            double correct2p, correct2n;
-                            float errp, err2p, errn, err2n;
-
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                            if (relaxedMode)
-                            {
-                                correctp = f->rfunc.f_fpf(0.0, &correct2p);
-                                correctn = f->rfunc.f_fpf(-0.0, &correct2n);
-                            }
-                            else
-                            {
-                                correctp = f->func.f_fpf(0.0, &correct2p);
-                                correctn = f->func.f_fpf(-0.0, &correct2n);
-                            }
-
-                            // Per section 10 paragraph 6, accept any result if
-                            // an input or output is a infinity or NaN or
-                            // overflow
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correctp)
-                                    || IsFloatNaN(correctp)
-                                    || IsFloatInfinity(correctn)
-                                    || IsFloatNaN(correctn)
-                                    || IsFloatInfinity(correct2p)
-                                    || IsFloatNaN(correct2p)
-                                    || IsFloatInfinity(correct2n)
-                                    || IsFloatNaN(correct2n))
-                                    continue;
-                            }
-
-                            if (relaxedMode)
-                            {
-                                errp = Abs_Error(test, correctp);
-                                err2p = Abs_Error(test, correct2p);
-                                errn = Abs_Error(test, correctn);
-                                err2n = Abs_Error(test, correct2n);
-                            }
-                            else
-                            {
-                                errp = Ulp_Error(test, correctp);
-                                err2p = Ulp_Error(test, correct2p);
-                                errn = Ulp_Error(test, correctn);
-                                err2n = Ulp_Error(test, correct2n);
-                            }
-
-                            fail = fail
-                                && ((!(fabsf(errp) <= float_ulps))
-                                    && (!(fabsf(err2p) <= float_ulps))
-                                    && ((!(fabsf(errn) <= float_ulps))
-                                        && (!(fabsf(err2n) <= float_ulps))));
-                            if (fabsf(errp) < fabsf(err)) err = errp;
-                            if (fabsf(errn) < fabsf(err)) err = errn;
-                            if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
-                            if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
-
-                            // retry per section 6.5.3.4
-                            if ((*isFloatResultSubnormalPtr)(correctp,
-                                                             float_ulps)
-                                || (*isFloatResultSubnormalPtr)(correctn,
-                                                                float_ulps))
-                            {
-                                if ((*isFloatResultSubnormalPtr)(correct2p,
-                                                                 float_ulps)
-                                    || (*isFloatResultSubnormalPtr)(correct2n,
-                                                                    float_ulps))
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f && test2 == 0.0f);
-                                    if (!fail) err = err2 = 0.0f;
-                                }
-                                else
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f
-                                             && fabsf(err2) <= float_ulps);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                            else if ((*isFloatResultSubnormalPtr)(correct2p,
-                                                                  float_ulps)
-                                     || (*isFloatResultSubnormalPtr)(
-                                         correct2n, float_ulps))
-                            {
-                                fail = fail
-                                    && !(test2 == 0.0f
-                                         && (fabsf(err) <= float_ulps));
-                                if (!fail) err2 = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError0)
-                    {
-                        maxError0 = fabsf(err);
-                        maxErrorVal0 = s[j];
-                    }
-                    if (fabsf(err2) > maxError1)
-                    {
-                        maxError1 = fabsf(err2);
-                        maxErrorVal1 = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: "
-                                   "*{%a, %a} vs. {%a, %a}\n",
-                                   f->name, sizeNames[k], err, err2,
-                                   ((float *)gIn)[j], ((float *)gOut_Ref)[j],
-                                   ((float *)gOut_Ref2)[j], test, test2);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
-             maxErrorVal1);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp
new file mode 100644
index 0000000..f5cc1e3
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results_i.cpp

@@ -0,0 +1,801 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <limits.h>
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_FloatI_Float(const Func *f, MTdata);
+int TestFunc_DoubleI_Double(const Func *f, MTdata);
+
+extern const vtbl _unary_two_results_i = { "unary_two_results_i",
+                                           TestFunc_FloatI_Float,
+                                           TestFunc_DoubleI_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 iout = INT_MIN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       int3 iout = INT_MIN;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       int3 iout = INT_MIN;\n"
+                        "       f0 = ", name, "( f0, &iout );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "       vstore3( iout, 0, out2 + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       int3 iout = INT_MIN;\n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       f0 = ", name, "( f0, &iout );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               out2[3*i+1] = iout.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               out2[3*i] = iout.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+cl_ulong  abs_cl_long( cl_long i );
+cl_ulong  abs_cl_long( cl_long i )
+{
+    cl_long mask = i >> 63;
+    return (i ^ mask) - mask;
+}
+
+int TestFunc_FloatI_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    float float_ulps;
+     uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+    cl_ulong  maxiError;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        int *r2 = (int *)gOut_Ref2;
+        float *s = (float *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_fpI( s[j], r2+j );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                int32_t *q2 = (int32_t *)(gOut2[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    float test = ((float*) q)[j];
+                    int correct2 = INT_MIN;
+                    double correct = f->func.f_fpI( s[j], &correct2 );
+                    float err = Ulp_Error( test, correct );
+                    cl_long iErr = (int64_t) q2[j] - (int64_t) correct2;
+                    int fail = ! (fabsf(err) <= float_ulps && abs_cl_long( iErr ) <= maxiError );
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatResultSubnormal(correct, float_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            int correct5, correct6;
+                            double correct3 = f->func.f_fpI( 0.0, &correct5 );
+                            double correct4 = f->func.f_fpI( -0.0, &correct6 );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+
+                            // Did +0 work?
+                            if( fabsf(err2) <= float_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            {
+                                err = err2;
+                                iErr = iErr2;
+                                fail = 0;
+                            }
+                            // Did -0 work?
+                            else if(fabsf(err3) <= float_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            {
+                                err = err3;
+                                iErr = iErr3;
+                                fail = 0;
+                            }
+
+                            // retry per section 6.5.3.4
+                            if( fail && (IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps )) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    iErr = 0;
+                                }
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %d} ulp error at %a: *{%a, %d} vs. {%a, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_DoubleI_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    cl_ulong  maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    uint64_t step = bufferSize / sizeof( double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j * scale);
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        int *r2 = (int *)gOut_Ref2;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fpI( s[j], r2+j );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+                int32_t *q2 = (int32_t *)(gOut2[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    double test = ((double*) q)[j];
+                    int correct2 = INT_MIN;
+                    long double correct = f->dfunc.f_fpI( s[j], &correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    cl_long iErr = (long long) q2[j] - (long long) correct2;
+                    int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            int correct5, correct6;
+                            long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
+                            long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+
+                            // Did +0 work?
+                            if( fabsf(err2) <= f->double_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            {
+                                err = err2;
+                                iErr = iErr2;
+                                fail = 0;
+                            }
+                            // Did -0 work?
+                            else if(fabsf(err3) <= f->double_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            {
+                                err = err3;
+                                iErr = iErr3;
+                                fail = 0;
+                            }
+
+                            // retry per section 6.5.3.4
+                            if( fail && (IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    iErr = 0;
+                                }
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %d} ulp error at %.13la: *{%.13la, %d} vs. {%.13la, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+

diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
deleted file mode 100644
index 14d1fb9..0000000
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ /dev/null

@@ -1,418 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <climits>
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global int* out2, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 iout = INT_MIN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 iout = INT_MIN;\n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-static cl_ulong abs_cl_long(cl_long i)
-{
-    cl_long mask = i >> 63;
-    return (i ^ mask) - mask;
-}
-
-int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int64_t maxError2 = 0;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-    int scale =
-        (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    Force64BitFPUPrecision();
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        int *r2 = (int *)gOut_Ref2;
-        double *s = (double *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-            r[j] = (double)f->dfunc.f_fpI(s[j], r2 + j);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-                int32_t *q2 = (int32_t *)(gOut2[k]);
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    double test = ((double *)q)[j];
-                    int correct2 = INT_MIN;
-                    long double correct = f->dfunc.f_fpI(s[j], &correct2);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    cl_long iErr = (long long)q2[j] - (long long)correct2;
-                    int fail = !(fabsf(err) <= f->double_ulps
-                                 && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
-                    {
-                        // retry per section 6.5.3.2
-                        if (IsDoubleResultSubnormal(correct, f->double_ulps))
-                        {
-                            fail = fail && !(test == 0.0f && iErr == 0);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // retry per section 6.5.3.3
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            int correct5, correct6;
-                            long double correct3 =
-                                f->dfunc.f_fpI(0.0, &correct5);
-                            long double correct4 =
-                                f->dfunc.f_fpI(-0.0, &correct6);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct4);
-                            cl_long iErr2 =
-                                (long long)q2[j] - (long long)correct5;
-                            cl_long iErr3 =
-                                (long long)q2[j] - (long long)correct6;
-
-                            // Did +0 work?
-                            if (fabsf(err2) <= f->double_ulps
-                                && abs_cl_long(iErr2) <= maxiError)
-                            {
-                                err = err2;
-                                iErr = iErr2;
-                                fail = 0;
-                            }
-                            // Did -0 work?
-                            else if (fabsf(err3) <= f->double_ulps
-                                     && abs_cl_long(iErr3) <= maxiError)
-                            {
-                                err = err3;
-                                iErr = iErr3;
-                                fail = 0;
-                            }
-
-                            // retry per section 6.5.3.4
-                            if (fail
-                                && (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (abs_cl_long(iErr2) <= maxiError
-                                             || abs_cl_long(iErr3)
-                                                 <= maxiError));
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    iErr = 0;
-                                }
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (llabs(iErr) > maxError2)
-                    {
-                        maxError2 = llabs(iErr);
-                        maxErrorVal2 = s[j];
-                    }
-
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %sD%s: {%f, %d} ulp error at "
-                                   "%.13la: *{%.13la, %d} vs. {%.13la, %d}\n",
-                                   f->name, sizeNames[k], err, (int)iErr,
-                                   ((double *)gIn)[j], ((double *)gOut_Ref)[j],
-                                   ((int *)gOut_Ref2)[j], test, q2[j]);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
deleted file mode 100644
index 23b0d70..0000000
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ /dev/null

@@ -1,416 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <climits>
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global int* out2, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 iout = INT_MIN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 iout = INT_MIN;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-static cl_ulong abs_cl_long(cl_long i)
-{
-    cl_long mask = i >> 63;
-    return (i ^ mask) - mask;
-}
-
-int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int64_t maxError2 = 0;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
-    cl_ulong maxiError;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-
-    maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j * scale;
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        int *r2 = (int *)gOut_Ref2;
-        float *s = (float *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = (float)f->func.f_fpI(s[j], r2 + j);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                int32_t *q2 = (int32_t *)(gOut2[k]);
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    float test = ((float *)q)[j];
-                    int correct2 = INT_MIN;
-                    double correct = f->func.f_fpI(s[j], &correct2);
-                    float err = Ulp_Error(test, correct);
-                    cl_long iErr = (int64_t)q2[j] - (int64_t)correct2;
-                    int fail = !(fabsf(err) <= float_ulps
-                                 && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
-                    {
-                        // retry per section 6.5.3.2
-                        if (IsFloatResultSubnormal(correct, float_ulps))
-                        {
-                            fail = fail && !(test == 0.0f && iErr == 0);
-                            if (!fail) err = 0.0f;
-                        }
-
-                        // retry per section 6.5.3.3
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            int correct5, correct6;
-                            double correct3 = f->func.f_fpI(0.0, &correct5);
-                            double correct4 = f->func.f_fpI(-0.0, &correct6);
-                            float err2 = Ulp_Error(test, correct3);
-                            float err3 = Ulp_Error(test, correct4);
-                            cl_long iErr2 =
-                                (long long)q2[j] - (long long)correct5;
-                            cl_long iErr3 =
-                                (long long)q2[j] - (long long)correct6;
-
-                            // Did +0 work?
-                            if (fabsf(err2) <= float_ulps
-                                && abs_cl_long(iErr2) <= maxiError)
-                            {
-                                err = err2;
-                                iErr = iErr2;
-                                fail = 0;
-                            }
-                            // Did -0 work?
-                            else if (fabsf(err3) <= float_ulps
-                                     && abs_cl_long(iErr3) <= maxiError)
-                            {
-                                err = err3;
-                                iErr = iErr3;
-                                fail = 0;
-                            }
-
-                            // retry per section 6.5.3.4
-                            if (fail
-                                && (IsFloatResultSubnormal(correct2, float_ulps)
-                                    || IsFloatResultSubnormal(correct3,
-                                                              float_ulps)))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (abs_cl_long(iErr2) <= maxiError
-                                             || abs_cl_long(iErr3)
-                                                 <= maxiError));
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    iErr = 0;
-                                }
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (llabs(iErr) > maxError2)
-                    {
-                        maxError2 = llabs(iErr);
-                        maxErrorVal2 = s[j];
-                    }
-
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %s%s: {%f, %d} ulp error at %a: "
-                                   "*{%a, %d} vs. {%a, %d}\n",
-                                   f->name, sizeNames[k], err, (int)iErr,
-                                   ((float *)gIn)[j], ((float *)gOut_Ref)[j],
-                                   ((int *)gOut_Ref2)[j], test, q2[j]);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp
new file mode 100644
index 0000000..690b6e7
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_u.cpp

@@ -0,0 +1,692 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_UInt(const Func *f, MTdata);
+int TestFunc_Double_ULong(const Func *f, MTdata);
+
+extern const vtbl _unary_u = { "unary_u", TestFunc_Float_UInt,
+                               TestFunc_Double_ULong };
+
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global uint", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global uint* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       uint3 u0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f0 = ", name, "( u0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       uint3 u0;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( u0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global ulong", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global ulong* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
+                        "       double3 f0 = ", name, "( u0 );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       ulong3 u0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               u0 = (ulong3)( in[3*i], in[3*i+1], 0xdeaddeaddeaddeadUL ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       double3 f0 = ", name, "( u0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_Float_UInt(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
+    int isRangeLimited = 0;
+    float float_ulps;
+    float half_sin_cos_tan_limit = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded)
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") )
+    {
+        isRangeLimited = 1;
+        half_sin_cos_tan_limit = 1.0f + float_ulps * (FLT_EPSILON/2.0f);             // out of range results from finite inputs must be in [-1,1]
+    }
+    else if( 0 == strcmp( f->name, "half_tan"))
+    {
+        isRangeLimited = 1;
+        half_sin_cos_tan_limit = INFINITY;             // out of range resut from finite inputs must be numeric
+    }
+
+
+    for( i = 0; i < (1ULL<<32); i += step  )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            {
+                vlog_error( "FAILURE -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float*) gOut_Ref;
+        cl_uint *s = (cl_uint*) gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_u( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+
+        //Verify data
+        uint32_t *t = (uint32_t*) gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t*)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = f->func.f_u( s[j] );
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= float_ulps);
+
+                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                    if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY )
+                    {
+                        if( fabsf( test ) <= half_sin_cos_tan_limit )
+                        {
+                            err = 0;
+                            fail = 0;
+                        }
+                    }
+
+                     if( fail )
+                    {
+                        if( ftz )
+                        {
+                            // retry per section 6.5.3.2
+                            if( IsFloatResultSubnormal(correct, float_ulps) )
+                            {
+                                fail = fail && ( test != 0.0f );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n", f->name, sizeNames[k], err, ((uint32_t*) gIn)[j], ((float*) gOut_Ref)[j], test );
+                      error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t*)gIn;
+        if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) )
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                ((float*)p)[j] = (float) genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = genrand_int32(d) & 0x7fffffff;
+        else
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILURE -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+static cl_ulong random64( MTdata d )
+{
+    return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+}
+
+int TestFunc_Double_ULong(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step  )
+    {
+        //Init input array
+        cl_ulong *p = (cl_ulong *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_ulong ); j++ )
+            p[j] = random64(d);
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            {
+                vlog_error( "FAILURE -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double*) gOut_Ref;
+        cl_ulong *s = (cl_ulong*) gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            r[j] = (double) f->dfunc.f_u( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+
+        //Verify data
+        uint64_t *t = (uint64_t*) gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t*)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_u( s[j] );
+                    float err = Bruteforce_Ulp_Error_Double(test, correct);
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+
+                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                    if( fail )
+                    {
+                        if( ftz )
+                        {
+                            // retry per section 6.5.3.2
+                            if( IsDoubleResultSubnormal(correct, f->double_ulps) )
+                            {
+                                fail = fail && ( test != 0.0 );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\n%s%sD: %f ulp error at 0x%16.16llx: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, ((uint64_t*) gIn)[j], ((double*) gOut_Ref)[j], test );
+                      error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double*) gIn;
+
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = random64(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILURE -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+

diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
deleted file mode 100644
index 3c5f99d..0000000
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ /dev/null

@@ -1,314 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global ulong",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global ulong* in                 )\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
-        "       double3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       ulong3 u0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, "
-        "0xdeaddeaddeaddeadUL ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               u0 = (ulong3)( in[3*i], in[3*i+1], "
-        "0xdeaddeaddeaddeadUL ); \n"
-        "               break;\n"
-        "       }\n"
-        "       double3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-static cl_ulong random64(MTdata d)
-{
-    return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
-}
-
-int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    Force64BitFPUPrecision();
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_ulong *p = (cl_ulong *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++)
-            p[j] = random64(d);
-
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        cl_ulong *s = (cl_ulong *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-            r[j] = (double)f->dfunc.f_u(s[j]);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    double test = ((double *)q)[j];
-                    long double correct = f->dfunc.f_u(s[j]);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    int fail = !(fabsf(err) <= f->double_ulps);
-
-                    if (fail)
-                    {
-                        if (ftz)
-                        {
-                            // retry per section 6.5.3.2
-                            if (IsDoubleResultSubnormal(correct,
-                                                        f->double_ulps))
-                            {
-                                fail = fail && (test != 0.0);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: "
-                                   "*%.13la vs. %.13la\n",
-                                   f->name, sizeNames[k], err,
-                                   ((uint64_t *)gIn)[j],
-                                   ((double *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
deleted file mode 100644
index 44c5af4..0000000
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ /dev/null

@@ -1,316 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-
-#include <cstring>
-
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global uint",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global uint* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       uint3 u0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       uint3 u0;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( u0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-}
-
-int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
-{
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(double)) + 1);
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j * scale;
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        cl_uint *s = (cl_uint *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = (float)f->func.f_u(s[j]);
-
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-
-        if (gSkipCorrectnessTesting) break;
-
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    float test = ((float *)q)[j];
-                    double correct = f->func.f_u(s[j]);
-                    float err = Ulp_Error(test, correct);
-                    int fail = !(fabsf(err) <= float_ulps);
-
-                    if (fail)
-                    {
-                        if (ftz)
-                        {
-                            // retry per section 6.5.3.2
-                            if (IsFloatResultSubnormal(correct, float_ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n",
-                            f->name, sizeNames[k], err, ((uint32_t *)gIn)[j],
-                            ((float *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-
-    return error;
-}

diff --git a/test_conformance/math_brute_force/utility.cpp b/test_conformance/math_brute_force/utility.cpp
deleted file mode 100644
index 9b0191a..0000000
--- a/test_conformance/math_brute_force/utility.cpp
+++ /dev/null

@@ -1,192 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "utility.h"
-#include "function_list.h"
-
-#if defined(__PPC__)
-// Global varaiable used to hold the FPU control register state. The FPSCR
-// register can not be used because not all Power implementations retain or
-// observed the NI (non-IEEE mode) bit.
-__thread fpu_control_t fpu_control = 0;
-#endif
-
-void MulD(double *rhi, double *rlo, double u, double v)
-{
-    const double c = 134217729.0; // 1+2^27
-    double up, u1, u2, vp, v1, v2;
-
-    up = u * c;
-    u1 = (u - up) + up;
-    u2 = u - u1;
-
-    vp = v * c;
-    v1 = (v - vp) + vp;
-    v2 = v - v1;
-
-    double rh = u * v;
-    double rl = (((u1 * v1 - rh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
-
-    *rhi = rh;
-    *rlo = rl;
-}
-
-void AddD(double *rhi, double *rlo, double a, double b)
-{
-    double zhi, zlo;
-    zhi = a + b;
-    if (fabs(a) > fabs(b))
-    {
-        zlo = zhi - a;
-        zlo = b - zlo;
-    }
-    else
-    {
-        zlo = zhi - b;
-        zlo = a - zlo;
-    }
-
-    *rhi = zhi;
-    *rlo = zlo;
-}
-
-void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
-{
-    double mh, ml;
-    double c = 134217729.0;
-    double up, u1, u2, vp, v1, v2;
-
-    up = xh * c;
-    u1 = (xh - up) + up;
-    u2 = xh - u1;
-
-    vp = yh * c;
-    v1 = (yh - vp) + vp;
-    v2 = yh - v1;
-
-    mh = xh * yh;
-    ml = (((u1 * v1 - mh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
-    ml += xh * yl + xl * yh;
-
-    *rhi = mh + ml;
-    *rlo = (mh - (*rhi)) + ml;
-}
-
-void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
-{
-    double r, s;
-    r = xh + yh;
-    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl)
-                              : (yh - r + xh + xl + yl);
-    *rhi = r + s;
-    *rlo = (r - (*rhi)) + s;
-}
-
-void DivideDD(double *chi, double *clo, double a, double b)
-{
-    *chi = a / b;
-    double rhi, rlo;
-    MulD(&rhi, &rlo, *chi, b);
-    AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
-    *clo = rhi / b;
-}
-
-// These functions comapre two floats/doubles. Since some platforms may choose
-// to flush denormals to zeros before comparison, comparison like a < b may give
-// wrong result in "certain cases" where we do need correct compasion result
-// when operands are denormals .... these functions comapre floats/doubles using
-// signed integer/long int rep. In other cases, when flushing to zeros is fine,
-// these should not be used. Also these doesn't check for nans and assume nans
-// are handled separately as special edge case by the caller which calls these
-// functions return 0 if both are equal, 1 if x > y and -1 if x < y.
-
-inline int compareFloats(float x, float y)
-{
-    int32f_t a, b;
-
-    a.f = x;
-    b.f = y;
-
-    if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
-    if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
-
-    if (a.i == b.i) return 0;
-
-    return a.i < b.i ? -1 : 1;
-}
-
-inline int compareDoubles(double x, double y)
-{
-    int64d_t a, b;
-
-    a.d = x;
-    b.d = y;
-
-    if (a.l & 0x8000000000000000LL) a.l = 0x8000000000000000LL - a.l;
-    if (b.l & 0x8000000000000000LL) b.l = 0x8000000000000000LL - b.l;
-
-    if (a.l == b.l) return 0;
-
-    return a.l < b.l ? -1 : 1;
-}
-
-void logFunctionInfo(const char *fname, unsigned int float_size,
-                     unsigned int isFastRelaxed)
-{
-    char const *fpSizeStr = NULL;
-    char const *fpFastRelaxedStr = "";
-    switch (float_size)
-    {
-        case sizeof(cl_double): fpSizeStr = "fp64"; break;
-        case sizeof(cl_float): fpSizeStr = "fp32"; break;
-        case sizeof(cl_half): fpSizeStr = "fp16"; break;
-    }
-    if (isFastRelaxed)
-    {
-        fpFastRelaxedStr = "rlx";
-    }
-    vlog("%15s %4s %4s", fname, fpSizeStr, fpFastRelaxedStr);
-}
-
-float getAllowedUlpError(const Func *f, const bool relaxed)
-{
-    float ulp;
-
-    if (relaxed)
-    {
-        if (gIsEmbedded)
-        {
-            ulp = f->relaxed_embedded_error;
-        }
-        else
-        {
-            ulp = f->relaxed_error;
-        }
-    }
-    else
-    {
-        if (gIsEmbedded)
-        {
-            ulp = f->float_embedded_ulps;
-        }
-        else
-        {
-            ulp = f->float_ulps;
-        }
-    }
-
-    return ulp;
-}

diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
deleted file mode 100644
index ac4db9c..0000000
--- a/test_conformance/math_brute_force/utility.h
+++ /dev/null

@@ -1,264 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef UTILITY_H
-#define UTILITY_H
-
-#include "harness/compat.h"
-#include "harness/rounding_mode.h"
-#include "harness/fpcontrol.h"
-#include "harness/testHarness.h"
-#include "harness/ThreadPool.h"
-#include "harness/conversions.h"
-
-#define BUFFER_SIZE (1024 * 1024 * 2)
-#define EMBEDDED_REDUCTION_FACTOR (64)
-
-#if defined(__GNUC__)
-#define UNUSED __attribute__((unused))
-#else
-#define UNUSED
-#endif
-
-struct Func;
-
-extern int gWimpyReductionFactor;
-
-#define VECTOR_SIZE_COUNT 6
-extern const char *sizeNames[VECTOR_SIZE_COUNT];
-extern const int sizeValues[VECTOR_SIZE_COUNT];
-
-extern cl_device_id gDevice;
-extern cl_context gContext;
-extern cl_command_queue gQueue;
-extern void *gIn;
-extern void *gIn2;
-extern void *gIn3;
-extern void *gOut_Ref;
-extern void *gOut_Ref2;
-extern void *gOut[VECTOR_SIZE_COUNT];
-extern void *gOut2[VECTOR_SIZE_COUNT];
-extern cl_mem gInBuffer;
-extern cl_mem gInBuffer2;
-extern cl_mem gInBuffer3;
-extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
-extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
-extern int gSkipCorrectnessTesting;
-extern int gForceFTZ;
-extern int gFastRelaxedDerived;
-extern int gWimpyMode;
-extern int gIsInRTZMode;
-extern int gInfNanSupport;
-extern int gIsEmbedded;
-extern int gVerboseBruteForce;
-extern uint32_t gMaxVectorSizeIndex;
-extern uint32_t gMinVectorSizeIndex;
-extern cl_device_fp_config gFloatCapabilities;
-
-#define LOWER_IS_BETTER 0
-#define HIGHER_IS_BETTER 1
-
-#include "harness/errorHelpers.h"
-
-#if defined(_MSC_VER)
-// Deal with missing scalbn on windows
-#define scalbnf(_a, _i) ldexpf(_a, _i)
-#define scalbn(_a, _i) ldexp(_a, _i)
-#define scalbnl(_a, _i) ldexpl(_a, _i)
-#endif
-
-float Abs_Error(float test, double reference);
-float Ulp_Error(float test, double reference);
-float Bruteforce_Ulp_Error_Double(double test, long double reference);
-
-int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
-               cl_program *p, bool relaxedMode);
-int MakeKernels(const char **c, cl_uint count, const char *name,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode);
-
-// used to convert a bucket of bits into a search pattern through double
-static inline double DoubleFromUInt32(uint32_t bits);
-static inline double DoubleFromUInt32(uint32_t bits)
-{
-    union {
-        uint64_t u;
-        double d;
-    } u;
-
-    // split 0x89abcdef to 0x89abc00000000def
-    u.u = bits & 0xfffU;
-    u.u |= (uint64_t)(bits & ~0xfffU) << 32;
-
-    // sign extend the leading bit of def segment as sign bit so that the middle
-    // region consists of either all 1s or 0s
-    u.u -= (bits & 0x800U) << 1;
-
-    // return result
-    return u.d;
-}
-
-void _LogBuildError(cl_program p, int line, const char *file);
-#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
-
-// The spec is fairly clear that we may enforce a hard cutoff to prevent
-// premature flushing to zero.
-// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
-// ulp_limit to be flushed to zero.
-static inline int IsFloatResultSubnormal(double x, float ulps)
-{
-    x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
-    return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
-}
-
-static inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
-{
-    x = x - abs_err;
-    return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
-}
-
-static inline int IsDoubleResultSubnormal(long double x, float ulps)
-{
-    x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps;
-    return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022);
-}
-
-static inline int IsFloatInfinity(double x)
-{
-    union {
-        cl_float d;
-        cl_uint u;
-    } u;
-    u.d = (cl_float)x;
-    return ((u.u & 0x7fffffffU) == 0x7F800000U);
-}
-
-static inline int IsFloatMaxFloat(double x)
-{
-    union {
-        cl_float d;
-        cl_uint u;
-    } u;
-    u.d = (cl_float)x;
-    return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
-}
-
-static inline int IsFloatNaN(double x)
-{
-    union {
-        cl_float d;
-        cl_uint u;
-    } u;
-    u.d = (cl_float)x;
-    return ((u.u & 0x7fffffffU) > 0x7F800000U);
-}
-
-extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
-
-// Windows (since long double got deprecated) sets the x87 to 53-bit precision
-// (that's x87 default state).  This causes problems with the tests that
-// convert long and ulong to float and double or otherwise deal with values
-// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
-static inline void Force64BitFPUPrecision(void)
-{
-#if __MINGW32__
-    // The usual method is to use _controlfp as follows:
-    //     #include <float.h>
-    //     _controlfp(_PC_64, _MCW_PC);
-    //
-    // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
-    // divergent code just use inline assembly which works for both.
-    unsigned short int orig_cw = 0;
-    unsigned short int new_cw = 0;
-    __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
-    new_cw = orig_cw | 0x0300; // set precision to 64-bit
-    __asm__ __volatile__("fldcw  %0" ::"m"(new_cw));
-#elif defined(_WIN32) && defined(__INTEL_COMPILER)
-    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not*
-    // work on win.x64: > On the x64 architecture, changing the floating point
-    // precision is not supported. (Taken from
-    // http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
-    int cw;
-    __asm { fnstcw cw }
-    ; // Get current value of FPU control word.
-    cw = cw & 0xfffffcff
-        | (3 << 8); // Set Precision Control to Double Extended Precision.
-    __asm { fldcw cw }
-    ; // Set new value of FPU control word.
-#else
-    /* Implement for other platforms if needed */
-#endif
-}
-
-extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
-
-typedef union {
-    int32_t i;
-    float f;
-} int32f_t;
-
-typedef union {
-    int64_t l;
-    double d;
-} int64d_t;
-
-void MulD(double *rhi, double *rlo, double u, double v);
-void AddD(double *rhi, double *rlo, double a, double b);
-void MulDD(double *rhi, double *rlo, double xh, double xl, double yh,
-           double yl);
-void AddDD(double *rhi, double *rlo, double xh, double xl, double yh,
-           double yl);
-void DivideDD(double *chi, double *clo, double a, double b);
-int compareFloats(float x, float y);
-int compareDoubles(double x, double y);
-
-void logFunctionInfo(const char *fname, unsigned int float_size,
-                     unsigned int isFastRelaxed);
-
-float getAllowedUlpError(const Func *f, const bool relaxed);
-
-static inline cl_uint getTestScale(size_t typeSize)
-{
-    if (gWimpyMode)
-    {
-        return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
-    }
-    else if (gIsEmbedded)
-    {
-        return EMBEDDED_REDUCTION_FACTOR;
-    }
-    else
-    {
-        return 1;
-    }
-}
-
-static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
-{
-    if (gWimpyMode)
-    {
-        return (1ULL << 32) * gWimpyReductionFactor / (512);
-    }
-    else if (gIsEmbedded)
-    {
-        return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
-    }
-    else
-    {
-        return bufferSize / typeSize;
-    }
-}
-
-#endif /* UTILITY_H */

diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h
index 1d3b475..91b47ab 100644
--- a/test_conformance/mem_host_flags/C_host_memory_block.h
+++ b/test_conformance/mem_host_flags/C_host_memory_block.h

@@ -69,28 +69,31 @@
 template < class T>
 C_host_memory_block<T>::~C_host_memory_block()
 {
-    if (pData != NULL) delete[] pData;
-    num_elements = 0;
+  if (pData!=NULL) delete pData;
+  num_elements = 0;
 }
 
 template < class T >
 void C_host_memory_block<T>::Init(int num_elem, T & value)
 {
-    if (pData != NULL) delete[] pData;
-    pData = new T[num_elem];
-    for (int i = 0; i < num_elem; i++) pData[i] = value;
+  if (pData!=NULL) delete pData;
+  pData= new T [num_elem];
+  for (int i=0; i<num_elem; i++)
+    pData[i] = value;
 
-    num_elements = num_elem;
+  num_elements= num_elem;
 }
 
 template < class T >
 void C_host_memory_block<T>::Init(int num_elem)
 {
-    if (pData != NULL) delete[] pData;
-    pData = new T[num_elem];
-    for (int i = 0; i < num_elem; i++) pData[i] = (T)i;
+  if (pData!=NULL) delete pData;
+  pData = new T [num_elem];
+  for (int i=0; i<num_elem; i++)
+    pData[i]= (T) i;
 
-    num_elements = num_elem;
+  num_elements = num_elem;
+
 }
 template < class T >
 void  C_host_memory_block<T>::Set_to_zero()

diff --git a/test_conformance/mem_host_flags/main.cpp b/test_conformance/mem_host_flags/main.cpp
index f064980..01bad67 100644
--- a/test_conformance/mem_host_flags/main.cpp
+++ b/test_conformance/mem_host_flags/main.cpp

@@ -47,5 +47,5 @@
 {
     log_info("1st part, non gl-sharing objects...\n");
     gTestRounding = true;
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }

diff --git a/test_conformance/multiple_device_context/main.cpp b/test_conformance/multiple_device_context/main.cpp
index 6e16c24..1027666 100644
--- a/test_conformance/multiple_device_context/main.cpp
+++ b/test_conformance/multiple_device_context/main.cpp

@@ -41,6 +41,6 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, true, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, true, 0 );
 }
 

diff --git a/test_conformance/multiple_device_context/test_multiple_devices.cpp b/test_conformance/multiple_device_context/test_multiple_devices.cpp
index 59543ad..b6f15f6 100644
--- a/test_conformance/multiple_device_context/test_multiple_devices.cpp
+++ b/test_conformance/multiple_device_context/test_multiple_devices.cpp

@@ -91,7 +91,7 @@
   for( i = 0; i < TEST_SIZE; i++ )
     data[i] = genrand_int32(seed);
 
-  stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+  stream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                           sizeof(cl_uint) * TEST_SIZE, data, &error);
   test_error(error, "Unable to create test array");
 

diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
index a4a6a74..de041c2 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp

@@ -243,134 +243,75 @@
   NL "}"
   NL ;
 
-TestNonUniformWorkGroup::TestNonUniformWorkGroup(
-    const cl_device_id &device, const cl_context &context,
-    const cl_command_queue &queue, const cl_uint dims, size_t *globalSize,
-    const size_t *localSize, const size_t *buffersSize,
-    const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
-    : _device(device), _context(context), _queue(queue), _dims(dims)
-{
+TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
+  const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize, const size_t *localSize, const size_t *buffersSize,
+  const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
+  : _device(device), _context(context), _queue(queue), _dims (dims) {
 
-    if (globalSize == NULL || dims < 1 || dims > 3)
-    {
-        // throw std::invalid_argument("globalSize is NULL value.");
-        // This is method of informing that parameters are wrong.
-        // It would be checked by prepareDevice() function.
-        // This is used because of lack of exception support.
-        _globalSize[0] = 0;
-        return;
+  if (globalSize == NULL || dims < 1 || dims > 3) {
+    //throw std::invalid_argument("globalSize is NULL value.");
+    // This is method of informing that parameters are wrong.
+    // It would be checked by prepareDevice() function.
+    // This is used because of lack of exception support.
+    _globalSize[0] = 0;
+    return;
+  }
+
+  cl_uint i;
+  _globalWorkOffset_IsNull = true;
+  _localSize_IsNull = true;
+
+  setGlobalWorkgroupSize(globalSize);
+  setLocalWorkgroupSize(globalSize,localSize);
+  for (i = _dims; i < MAX_DIMS; i++) {
+    _globalSize[i] = 1;
+  }
+
+  for (i = 0; i < MAX_DIMS; i++) {
+    _globalWorkOffset[i] = 0;
+  }
+
+  if (globalWorkOffset) {
+    _globalWorkOffset_IsNull = false;
+    for (i = 0; i < _dims; i++) {
+      _globalWorkOffset[i] = globalWorkOffset[i];
     }
+  }
 
-    // For OpenCL-3.0 support for non-uniform workgroups is optional, it's still
-    // useful to run these tests since we can verify the behavior of the
-    // get_enqueued_local_size() builtin for uniform workgroups, so we round up
-    // the global size to insure uniform workgroups on those 3.0 devices.
-    // We only need to do this when localSize is non-null, otherwise the driver
-    // will select a value for localSize which will be uniform on devices that
-    // don't support non-uniform work-groups.
-    if (nullptr != localSize && get_device_cl_version(device) >= Version(3, 0))
-    {
-        // Query for the non-uniform work-group support.
-        cl_bool are_non_uniform_sub_groups_supported{ CL_FALSE };
-        auto error =
-            clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
-                            sizeof(are_non_uniform_sub_groups_supported),
-                            &are_non_uniform_sub_groups_supported, nullptr);
-        if (error)
-        {
-            print_error(error,
-                        "clGetDeviceInfo failed for "
-                        "CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT");
-            // This signals an error to the caller (see above).
-            _globalSize[0] = 0;
-            return;
-        }
+  for (i = 0; i < MAX_DIMS; i++) {
+    _enqueuedLocalSize[i] = 1;
+  }
 
-        // If non-uniform work-groups are not supported round up the global
-        // sizes so workgroups are uniform and we have at least one.
-        if (CL_FALSE == are_non_uniform_sub_groups_supported)
-        {
-            log_info(
-                "WARNING: Non-uniform work-groups are not supported on this "
-                "device.\n Running test with uniform work-groups.\n");
-            for (unsigned dim = 0; dim < dims; ++dim)
-            {
-                auto global_size_before = globalSize[dim];
-                auto global_size_rounded = global_size_before
-                    + (localSize[dim] - global_size_before % localSize[dim]);
-                globalSize[dim] = global_size_rounded;
-                log_info("Rounding globalSize[%d] = %d -> %d\n", dim,
-                         global_size_before, global_size_rounded);
-            }
-        }
+  if (localSize) {
+    _localSize_IsNull = false;
+    for (i = 0; i < _dims; i++) {
+      _enqueuedLocalSize[i] = _localSize[i];
     }
+  }
 
-    cl_uint i;
-    _globalWorkOffset_IsNull = true;
-    _localSize_IsNull = true;
-
-    setGlobalWorkgroupSize(globalSize);
-    setLocalWorkgroupSize(globalSize, localSize);
-    for (i = _dims; i < MAX_DIMS; i++)
-    {
-        _globalSize[i] = 1;
+  if (reqdWorkGroupSize) {
+    for (i = 0; i < _dims; i++) {
+      _reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
     }
-
-    for (i = 0; i < MAX_DIMS; i++)
-    {
-        _globalWorkOffset[i] = 0;
+    for (i = _dims; i < MAX_DIMS; i++) {
+      _reqdWorkGroupSize[i] = 1;
     }
+  } else {
+    _reqdWorkGroupSize[0] = 0;
+    _reqdWorkGroupSize[1] = 0;
+    _reqdWorkGroupSize[2] = 0;
+  }
 
-    if (globalWorkOffset)
-    {
-        _globalWorkOffset_IsNull = false;
-        for (i = 0; i < _dims; i++)
-        {
-            _globalWorkOffset[i] = globalWorkOffset[i];
-        }
-    }
+  _testRange = Range::ALL;
 
-    for (i = 0; i < MAX_DIMS; i++)
-    {
-        _enqueuedLocalSize[i] = 1;
-    }
+  _numOfGlobalWorkItems = _globalSize[0]*_globalSize[1]*_globalSize[2];
 
-    if (localSize)
-    {
-        _localSize_IsNull = false;
-        for (i = 0; i < _dims; i++)
-        {
-            _enqueuedLocalSize[i] = _localSize[i];
-        }
-    }
+  DataContainerAttrib temp = {{0, 0, 0}};
 
-    if (reqdWorkGroupSize)
-    {
-        for (i = 0; i < _dims; i++)
-        {
-            _reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
-        }
-        for (i = _dims; i < MAX_DIMS; i++)
-        {
-            _reqdWorkGroupSize[i] = 1;
-        }
-    }
-    else
-    {
-        _reqdWorkGroupSize[0] = 0;
-        _reqdWorkGroupSize[1] = 0;
-        _reqdWorkGroupSize[2] = 0;
-    }
+  // array with results from each region
+  _resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
+  _referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
 
-    _testRange = Range::ALL;
-
-    _numOfGlobalWorkItems = _globalSize[0] * _globalSize[1] * _globalSize[2];
-
-    DataContainerAttrib temp = { { 0, 0, 0 } };
-
-    // array with results from each region
-    _resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
-    _referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
 }
 
 TestNonUniformWorkGroup::~TestNonUniformWorkGroup () {
@@ -541,7 +482,7 @@
   if(_localSize_IsNull == false)
     calculateExpectedValues();
 
-  std::string buildOptions{};
+  std::string buildOptions = BUILD_CL_STD_2_0;
   if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
     std::ostringstream tmp(" ");
     tmp << " -D RWGSX=" << _reqdWorkGroupSize[0]
@@ -780,50 +721,42 @@
   return 0;
 }
 
-void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims,
-                                                 size_t *globalSize,
-                                                 const size_t *localSize,
-                                                 int range)
-{
-    runTestNonUniformWorkGroup(dims, globalSize, localSize, NULL, NULL, range);
+void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
+  const size_t *localSize, int range) {
+  runTestNonUniformWorkGroup (dims, globalSize, localSize, NULL, NULL, range);
 }
 
-void SubTestExecutor::runTestNonUniformWorkGroup(
-    const cl_uint dims, size_t *globalSize, const size_t *localSize,
-    const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize, int range)
-{
+void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
+  const size_t *localSize, const size_t *globalWorkOffset,
+  const size_t *reqdWorkGroupSize, int range) {
 
 
-    int err;
-    ++_overallCounter;
-    TestNonUniformWorkGroup test(_device, _context, _queue, dims, globalSize,
-                                 localSize, NULL, globalWorkOffset,
-                                 reqdWorkGroupSize);
+  int err;
+  ++_overallCounter;
+  TestNonUniformWorkGroup test (_device, _context, _queue, dims, globalSize, localSize,
+    NULL, globalWorkOffset, reqdWorkGroupSize);
 
-    test.setTestRange(range);
-    err = test.prepareDevice();
-    if (err)
-    {
-        log_error("Error: prepare device\n");
-        ++_failCounter;
-        return;
-    }
+  test.setTestRange(range);
+  err = test.prepareDevice();
+  if (err) {
+    log_error ("Error: prepare device\n");
+    ++_failCounter;
+    return;
+  }
 
-    err = test.runKernel();
-    if (err)
-    {
-        log_error("Error: run kernel\n");
-        ++_failCounter;
-        return;
-    }
+  err = test.runKernel();
+  if (err) {
+    log_error ("Error: run kernel\n");
+    ++_failCounter;
+    return;
+  }
 
-    err = test.verifyResults();
-    if (err)
-    {
-        log_error("Error: verify results\n");
-        ++_failCounter;
-        return;
-    }
+  err = test.verifyResults();
+  if (err) {
+    log_error ("Error: verify results\n");
+    ++_failCounter;
+    return;
+  }
 }
 
 int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
@@ -831,7 +764,7 @@
 
   clProgramWrapper program;
   clKernelWrapper testKernel;
-  std::string buildOptions{};
+  std::string buildOptions = BUILD_CL_STD_2_0;
 
   if (testRange & Range::BASIC)
     buildOptions += " -D TESTBASIC";

diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
index 414d100..e0c635a 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h

@@ -25,6 +25,8 @@
 
 #define NUMBER_OF_REGIONS 8
 
+#define BUILD_CL_STD_2_0 "-cl-std=CL2.0"
+
 #define MAX_DIMS 3
 
 // This structure reflects data received from kernel.
@@ -60,21 +62,18 @@
 // Main class responsible for testing
 class TestNonUniformWorkGroup {
 public:
-    TestNonUniformWorkGroup(const cl_device_id &device,
-                            const cl_context &context,
-                            const cl_command_queue &queue, const cl_uint dims,
-                            size_t *globalSize, const size_t *localSize,
-                            const size_t *buffersSize,
-                            const size_t *globalWorkOffset,
-                            const size_t *reqdWorkGroupSize = NULL);
 
-    ~TestNonUniformWorkGroup();
+  TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
+    const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, const size_t *buffersSize, const size_t *globalWorkOffset,
+    const size_t *reqdWorkGroupSize=NULL);
 
-    static size_t getMaxLocalWorkgroupSize(const cl_device_id &device);
-    static void setMaxLocalWorkgroupSize(size_t workGroupSize)
-    {
-        TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
-    }
+  ~TestNonUniformWorkGroup ();
+
+  static size_t getMaxLocalWorkgroupSize (const cl_device_id &device);
+  static void setMaxLocalWorkgroupSize (size_t workGroupSize) {
+    TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
+  }
   static void enableStrictMode (bool state);
 
   void setTestRange (int range) {_testRange = range;}
@@ -127,13 +126,12 @@
   SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue)
     : _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {}
 
-  void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
-                                  const size_t *localSize, int range);
+  void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, int range);
 
-  void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
-                                  const size_t *localSize,
-                                  const size_t *globalWorkOffset,
-                                  const size_t *reqdWorkGroupSize, int range);
+  void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, const size_t *globalWorkOffset,
+    const size_t *reqdWorkGroupSize, int range);
 
   int calculateWorkGroupSize(size_t &maxWgSize, int testRange);
   int status();

diff --git a/test_conformance/non_uniform_work_group/main.cpp b/test_conformance/non_uniform_work_group/main.cpp
index 64eff96..e448afe 100644
--- a/test_conformance/non_uniform_work_group/main.cpp
+++ b/test_conformance/non_uniform_work_group/main.cpp

@@ -43,9 +43,7 @@
     auto expected_min_version = Version(2, 0);
     if (version < expected_min_version)
     {
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
     }
     return TEST_PASS;

diff --git a/test_conformance/opencl_conformance_tests_12_conversions.csv b/test_conformance/opencl_conformance_tests_12_conversions.csv
new file mode 100644
index 0000000..c8e283a
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_conversions.csv

@@ -0,0 +1,4 @@
+#
+# OpenCL Conformance Test Suite (conversions only)
+#
+Conversions,conversions/test_conversions

diff --git a/test_conformance/opencl_conformance_tests_12_d3d.csv b/test_conformance/opencl_conformance_tests_12_d3d.csv
new file mode 100644
index 0000000..53466eb
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_d3d.csv

@@ -0,0 +1,5 @@
+#
+# OpenCL Conformance Test for DirectX interop
+#
+
+D3D10,D3D10/test_d3d10 

diff --git a/test_conformance/opencl_conformance_tests_12_full.csv b/test_conformance/opencl_conformance_tests_12_full.csv
new file mode 100644
index 0000000..083fcfa
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_full.csv

@@ -0,0 +1,82 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Conversions,conversions/test_conversions
+Contractions,contractions/test_contractions
+Math,math_brute_force/test_bruteforce
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half

diff --git a/test_conformance/opencl_conformance_tests_12_full_no_math_or_conversions.csv b/test_conformance/opencl_conformance_tests_12_full_no_math_or_conversions.csv
new file mode 100644
index 0000000..2f8d653
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_full_no_math_or_conversions.csv

@@ -0,0 +1,79 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Contractions,contractions/test_contractions
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
+

diff --git a/test_conformance/opencl_conformance_tests_12_math.csv b/test_conformance/opencl_conformance_tests_12_math.csv
new file mode 100644
index 0000000..e033190
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_math.csv

@@ -0,0 +1,4 @@
+#
+# OpenCL Conformance Test Suite (math only)
+#
+Math,math_brute_force/test_bruteforce 

diff --git a/test_conformance/opencl_conformance_tests_12_quick.csv b/test_conformance/opencl_conformance_tests_12_quick.csv
new file mode 100644
index 0000000..af59165
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_quick.csv

@@ -0,0 +1,81 @@
+#
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+#Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+#Conversions,conversions/test_conversions
+Contractions,contractions/test_contractions
+Math,math_brute_force/test_bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/test_half -w

diff --git a/test_conformance/opencl_conformance_tests_20_full.csv b/test_conformance/opencl_conformance_tests_20_full.csv
new file mode 100644
index 0000000..e1c0ecd
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_20_full.csv

@@ -0,0 +1,102 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Conversions,conversions/test_conversions
+Contractions,contractions/test_contractions
+Math,math_brute_force/test_bruteforce
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
+
+#####################################
+# OpenCL 2.0 tests
+#####################################
+C11 Atomics,c11_atomics/test_c11_atomics
+Execution Model,device_execution/test_device_execution
+Generic Address Space,generic_address_space/test_generic_address_space
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
+Pipes,pipes/test_pipes
+SVM,SVM/test_svm
+Workgroups,workgroups/test_workgroups
+
+#########################################
+# Extensions
+#########################################
+SPIR,spir/test_spir
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
+Subgroups,subgroups/test_subgroups

diff --git a/test_conformance/opencl_conformance_tests_20_full_no_math_or_conversions.csv b/test_conformance/opencl_conformance_tests_20_full_no_math_or_conversions.csv
new file mode 100644
index 0000000..2f8d653
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_20_full_no_math_or_conversions.csv

@@ -0,0 +1,79 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Contractions,contractions/test_contractions
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
+

diff --git a/test_conformance/opencl_conformance_tests_20_quick.csv b/test_conformance/opencl_conformance_tests_20_quick.csv
new file mode 100644
index 0000000..af59165
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_20_quick.csv

@@ -0,0 +1,81 @@
+#
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+#Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+#Conversions,conversions/test_conversions
+Contractions,contractions/test_contractions
+Math,math_brute_force/test_bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/test_half -w

diff --git a/test_conformance/opencl_conformance_tests_21_full_spirv.csv b/test_conformance/opencl_conformance_tests_21_full_spirv.csv
new file mode 100644
index 0000000..4b84596
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_21_full_spirv.csv

@@ -0,0 +1,108 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic --compilation-mode spir-v --compilation-cache-path .
+API,api/test_api --compilation-mode spir-v --compilation-cache-path .
+Compiler,compiler/test_compiler --compilation-mode spir-v --compilation-cache-path .
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns --compilation-mode spir-v --compilation-cache-path .
+Geometric Functions,geometrics/test_geometrics --compilation-mode spir-v --compilation-cache-path .
+Relationals,relationals/test_relationals --compilation-mode spir-v --compilation-cache-path .
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full* --compilation-mode spir-v --compilation-cache-path .
+Multiple Device/Context,multiple_device_context/test_multiples --compilation-mode spir-v --compilation-cache-path .
+Atomics,atomics/test_atomics --compilation-mode spir-v --compilation-cache-path .
+Profiling,profiling/test_profiling --compilation-mode spir-v --compilation-cache-path .
+Events,events/test_events --compilation-mode spir-v --compilation-cache-path .
+Allocations (single maximum),allocations/test_allocations single 5 all --compilation-mode spir-v --compilation-cache-path .
+Allocations (total maximum),allocations/test_allocations multiple 5 all --compilation-mode spir-v --compilation-cache-path .
+VecAlign, vec_align/test_vecalign --compilation-mode spir-v --compilation-cache-path .
+VecStep, vec_step/test_vecstep --compilation-mode spir-v --compilation-cache-path .
+Printf,printf/test_printf --compilation-mode spir-v --compilation-cache-path .
+Device Partitioning,device_partition/test_device_partition --compilation-mode spir-v --compilation-cache-path .
+
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Buffers,buffers/test_buffers --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads --compilation-mode spir-v --compilation-cache-path .
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches --compilation-mode spir-v --compilation-cache-path .
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images --compilation-mode spir-v --compilation-cache-path .
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl --compilation-mode spir-v --compilation-cache-path .
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select --compilation-mode spir-v --compilation-cache-path .
+Conversions,conversions/test_conversions --compilation-mode spir-v --compilation-cache-path .
+Contractions,contractions/test_contractions --compilation-mode spir-v --compilation-cache-path .
+Math,math_brute_force/test_bruteforce --compilation-mode spir-v --compilation-cache-path .
+Integer Ops,integer_ops/test_integer_ops --compilation-mode spir-v --compilation-cache-path .
+Half Ops,half/test_half --compilation-mode spir-v --compilation-cache-path .
+
+#####################################
+# OpenCL 2.0 tests
+#####################################
+C11 Atomics,c11_atomics/test_c11_atomics --compilation-mode spir-v --compilation-cache-path .
+Execution Model,device_execution/test_device_execution --compilation-mode spir-v --compilation-cache-path .
+Generic Address Space,generic_address_space/test_generic_address_space --compilation-mode spir-v --compilation-cache-path .
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group --compilation-mode spir-v --compilation-cache-path .
+Pipes,pipes/test_pipes --compilation-mode spir-v --compilation-cache-path .
+SVM,SVM/test_svm --compilation-mode spir-v --compilation-cache-path .
+Workgroups,workgroups/test_workgroups --compilation-mode spir-v --compilation-cache-path .
+
+#####################################
+# OpenCL 2.1 tests
+#####################################
+Device timer,device_timer/test_device_timer
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
+
+#########################################
+# Extensions
+#########################################
+SPIR,spir/test_spir
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
+Subgroups,subgroups/test_subgroups --compilation-mode spir-v --compilation-cache-path .

diff --git a/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv
new file mode 100644
index 0000000..bbde183
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv

@@ -0,0 +1,100 @@
+#
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Conversions,conversions/test_conversions -w
+Contractions,contractions/test_contractions
+Math,math_brute_force/test_bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/test_half -w
+
+#####################################
+# OpenCL 2.0 tests
+#####################################
+C11 Atomics,c11_atomics/test_c11_atomics
+Execution Model,device_execution/test_device_execution
+Generic Address Space,generic_address_space/test_generic_address_space
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
+Pipes,pipes/test_pipes
+SVM,SVM/test_svm
+Workgroups,workgroups/test_workgroups
+
+#####################################
+# OpenCL 2.1 tests
+#####################################
+Device timer,device_timer/test_device_timer
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
+
+#########################################
+# Extensions
+#########################################
+SPIR,spir/test_spir
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
+Subgroups,subgroups/test_subgroups

diff --git a/test_conformance/opencl_conformance_tests_22.csv b/test_conformance/opencl_conformance_tests_22.csv
new file mode 100644
index 0000000..2ef864a
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_22.csv

@@ -0,0 +1,45 @@
+#
+# OpenCL Conformance Test Suite (2.2 version)
+#
+
+# #########################################
+# New API features
+# #########################################
+API (ctors and dtors of global scope vars) , clcpp/api/test_cpp_api "test_global_scope*"
+API (specialization constants)             , clcpp/api/test_cpp_api "test_spec_consts*"
+
+# #########################################
+# New representation of types
+# #########################################
+Images and samplers                        , clcpp/images/test_cpp_images
+Pipes and reservations                     , clcpp/pipes/test_cpp_pipes "test_pipes_pipe"
+Device enqueue and events                  , clcpp/device_queue/test_cpp_device_queue
+Address spaces                             , clcpp/address_spaces/test_cpp_address_spaces
+
+# #########################################
+# New representation of functions
+# #########################################
+Conversions (convert_cast)                 , clcpp/convert/test_cpp_convert
+Reinterpreting (as_type)                   , clcpp/reinterpret/test_cpp_reinterpret
+Atomics                                    , clcpp/atomics/test_cpp_atomics
+Work-item functions                        , clcpp/workitems/test_cpp_workitems
+Work-group functions                       , clcpp/workgroups/test_cpp_workgroups
+Sub-group functions                        , clcpp/subgroups/test_cpp_subgroups
+Synchronization functions                  , clcpp/synchronization/test_cpp_synchronization "test_work_group_barrier*" "test_sub_group_barrier*"
+Math functions                             , clcpp/math_funcs/test_cpp_math_funcs
+Integer functions                          , clcpp/integer_funcs/test_cpp_integer_funcs
+Common functions                           , clcpp/common_funcs/test_cpp_common_funcs
+Geometric functions                        , clcpp/geometric_funcs/test_cpp_geometric_funcs
+Relational functions                       , clcpp/relational_funcs/test_cpp_relational_funcs
+vload and vstore functions                 , clcpp/vload_vstore/test_cpp_vload_vstore_funcs
+
+# #########################################
+# New in OpenCL C++
+# #########################################
+Specialization constants                   , clcpp/spec_constants/test_cpp_spec_constants
+Named barriers (KHR extension)             , clcpp/synchronization/test_cpp_synchronization "test_work_group_named_barrier*"
+required_num_sub_groups attribute          , clcpp/attributes/test_cpp_attributes "test_required_num_sub_groups*"
+ivdep attribute                            , clcpp/attributes/test_cpp_attributes "test_ivdep*"
+max_size attribute                         , clcpp/attributes/test_cpp_attributes "test_max_size*"
+Ctors and dtors of global scope objects    , clcpp/program_scope_ctors_dtors/test_cpp_program_scope_ctors_dtors
+Pipe storages                              , clcpp/pipes/test_cpp_pipes "test_pipes_pipe_storage"

diff --git a/test_conformance/opencl_conformance_tests_full.csv b/test_conformance/opencl_conformance_tests_full.csv
index 2b0dc8a..0555527 100644
--- a/test_conformance/opencl_conformance_tests_full.csv
+++ b/test_conformance/opencl_conformance_tests_full.csv

@@ -30,8 +30,9 @@
 Profiling,profiling/test_profiling
 Events,events/test_events
 Allocations (single maximum),allocations/test_allocations single 5 all
-Allocations (total maximum),allocations/test_allocations multiple 5 all
-Vectors, vectors/test_vectors
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
 Printf,printf/test_printf
 Device Partitioning,device_partition/test_device_partition
 
@@ -39,23 +40,23 @@
 # Buffers and images
 # #########################################
 Buffers,buffers/test_buffers
-Images (API Info),images/clGetInfo/test_cl_get_info
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
 Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
 Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
 Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
 Images (clCopyImage),images/clCopyImage/test_cl_copy_images
 Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
 Mem (Host Flags),mem_host_flags/test_mem_host_flags
 
 # #########################################
@@ -95,7 +96,7 @@
 # OpenCL 2.1 tests
 #####################################
 Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
 
 #########################################
 # Extensions

diff --git a/test_conformance/opencl_conformance_tests_full_binary.csv b/test_conformance/opencl_conformance_tests_full_binary.csv
deleted file mode 100644
index 348f32e..0000000
--- a/test_conformance/opencl_conformance_tests_full_binary.csv
+++ /dev/null

@@ -1,107 +0,0 @@
-#
-# OpenCL Conformance Test Suite (full version)
-#
-
-# #########################################
-# Basic Information on the compute device
-# #########################################
-Compute Info,computeinfo/test_computeinfo
-
-# #########################################
-# Basic operation tests
-# #########################################
-Basic,basic/test_basic --compilation-mode binary --compilation-cache-path .
-API,api/test_api --compilation-mode binary --compilation-cache-path .
-Compiler,compiler/test_compiler --compilation-mode binary --compilation-cache-path .
-
-# #########################################
-# Common mathematical functions
-# #########################################
-Common Functions,commonfns/test_commonfns --compilation-mode binary --compilation-cache-path .
-Geometric Functions,geometrics/test_geometrics --compilation-mode binary --compilation-cache-path .
-Relationals,relationals/test_relationals --compilation-mode binary --compilation-cache-path .
-
-# #########################################
-# General operation
-# #########################################
-Thread Dimensions,thread_dimensions/test_thread_dimensions full* --compilation-mode binary --compilation-cache-path .
-Multiple Device/Context,multiple_device_context/test_multiples --compilation-mode binary --compilation-cache-path .
-Atomics,atomics/test_atomics --compilation-mode binary --compilation-cache-path .
-Profiling,profiling/test_profiling --compilation-mode binary --compilation-cache-path .
-Events,events/test_events --compilation-mode binary --compilation-cache-path .
-Allocations (single maximum),allocations/test_allocations single 5 all --compilation-mode binary --compilation-cache-path .
-Allocations (total maximum),allocations/test_allocations multiple 5 all --compilation-mode binary --compilation-cache-path .
-Vectors, vectors/test_vectors --compilation-mode binary --compilation-cache-path .
-Printf,printf/test_printf --compilation-mode binary --compilation-cache-path .
-Device Partitioning,device_partition/test_device_partition --compilation-mode binary --compilation-cache-path .
-
-# #########################################
-# Buffers and images
-# #########################################
-Images (API Info),images/clGetInfo/test_cl_get_info
-Buffers,buffers/test_buffers --compilation-mode binary --compilation-cache-path .
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods --compilation-mode binary --compilation-cache-path .
-Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Images (clCopyImage),images/clCopyImage/test_cl_copy_images
-Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads --compilation-mode binary --compilation-cache-path .
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches --compilation-mode binary --compilation-cache-path .
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images --compilation-mode binary --compilation-cache-path .
-Mem (Host Flags),mem_host_flags/test_mem_host_flags
-
-# #########################################
-# CPU is required to pass linear and normalized image filtering
-# #########################################
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR --compilation-mode binary --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR --compilation-mode binary --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR --compilation-mode binary --compilation-cache-path .
-
-# #########################################
-# OpenGL/CL interaction
-# #########################################
-OpenCL-GL Sharing,gl/test_gl --compilation-mode binary --compilation-cache-path .
-
-# #########################################
-# Thorough math and conversions tests
-# #########################################
-Select,select/test_select --compilation-mode binary --compilation-cache-path .
-Conversions,conversions/test_conversions --compilation-mode binary --compilation-cache-path .
-Contractions,contractions/test_contractions --compilation-mode binary --compilation-cache-path .
-Math,math_brute_force/test_bruteforce --compilation-mode binary --compilation-cache-path .
-Integer Ops,integer_ops/test_integer_ops --compilation-mode binary --compilation-cache-path .
-Half Ops,half/test_half --compilation-mode binary --compilation-cache-path .
-
-#####################################
-# OpenCL 2.0 tests
-#####################################
-C11 Atomics,c11_atomics/test_c11_atomics --compilation-mode binary --compilation-cache-path .
-Execution Model,device_execution/test_device_execution --compilation-mode binary --compilation-cache-path .
-Generic Address Space,generic_address_space/test_generic_address_space --compilation-mode binary --compilation-cache-path .
-Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group --compilation-mode binary --compilation-cache-path .
-Pipes,pipes/test_pipes --compilation-mode binary --compilation-cache-path .
-SVM,SVM/test_svm --compilation-mode binary --compilation-cache-path .
-Workgroups,workgroups/test_workgroups --compilation-mode binary --compilation-cache-path .
-
-#####################################
-# OpenCL 2.1 tests
-#####################################
-Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
-
-#########################################
-# Extensions
-#########################################
-SPIR,spir/test_spir
-Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps --compilation-mode binary --compilation-cache-path .
-Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps --compilation-mode binary --compilation-cache-path .
-Subgroups,subgroups/test_subgroups --compilation-mode binary --compilation-cache-path .

diff --git a/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv b/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv
index 89d4490..ecc1314 100644
--- a/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv
+++ b/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv

@@ -30,8 +30,9 @@
 Profiling,profiling/test_profiling
 Events,events/test_events
 Allocations (single maximum),allocations/test_allocations single 5 all
-Allocations (total maximum),allocations/test_allocations multiple 5 all
-Vectors, vectors/test_vectors
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
 Printf,printf/test_printf
 Device Partitioning,device_partition/test_device_partition
 
@@ -39,23 +40,23 @@
 # Buffers and images
 # #########################################
 Buffers,buffers/test_buffers
-Images (API Info),images/clGetInfo/test_cl_get_info
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
 Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
 Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
 Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
 Images (clCopyImage),images/clCopyImage/test_cl_copy_images
 Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
 Mem (Host Flags),mem_host_flags/test_mem_host_flags
 
 # #########################################
@@ -93,7 +94,7 @@
 # OpenCL 2.1 tests
 #####################################
 Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
 
 #########################################
 # Extensions

diff --git a/test_conformance/opencl_conformance_tests_full_spirv.csv b/test_conformance/opencl_conformance_tests_full_spirv.csv
deleted file mode 100644
index 24b4186..0000000
--- a/test_conformance/opencl_conformance_tests_full_spirv.csv
+++ /dev/null

@@ -1,107 +0,0 @@
-#
-# OpenCL Conformance Test Suite (full version)
-#
-
-# #########################################
-# Basic Information on the compute device
-# #########################################
-Compute Info,computeinfo/test_computeinfo
-
-# #########################################
-# Basic operation tests
-# #########################################
-Basic,basic/test_basic --compilation-mode spir-v --compilation-cache-path .
-API,api/test_api --compilation-mode spir-v --compilation-cache-path .
-Compiler,compiler/test_compiler --compilation-mode spir-v --compilation-cache-path .
-
-# #########################################
-# Common mathematical functions
-# #########################################
-Common Functions,commonfns/test_commonfns --compilation-mode spir-v --compilation-cache-path .
-Geometric Functions,geometrics/test_geometrics --compilation-mode spir-v --compilation-cache-path .
-Relationals,relationals/test_relationals --compilation-mode spir-v --compilation-cache-path .
-
-# #########################################
-# General operation
-# #########################################
-Thread Dimensions,thread_dimensions/test_thread_dimensions full* --compilation-mode spir-v --compilation-cache-path .
-Multiple Device/Context,multiple_device_context/test_multiples --compilation-mode spir-v --compilation-cache-path .
-Atomics,atomics/test_atomics --compilation-mode spir-v --compilation-cache-path .
-Profiling,profiling/test_profiling --compilation-mode spir-v --compilation-cache-path .
-Events,events/test_events --compilation-mode spir-v --compilation-cache-path .
-Allocations (single maximum),allocations/test_allocations single 5 all --compilation-mode spir-v --compilation-cache-path .
-Allocations (total maximum),allocations/test_allocations multiple 5 all --compilation-mode spir-v --compilation-cache-path .
-Vectors, vectors/test_vectors --compilation-mode spir-v --compilation-cache-path .
-Printf,printf/test_printf --compilation-mode spir-v --compilation-cache-path .
-Device Partitioning,device_partition/test_device_partition --compilation-mode spir-v --compilation-cache-path .
-
-# #########################################
-# Buffers and images
-# #########################################
-Images (API Info),images/clGetInfo/test_cl_get_info 
-Buffers,buffers/test_buffers --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Images (clCopyImage),images/clCopyImage/test_cl_copy_images
-Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
-Images (clFillImage),images/clFillImage/test_cl_fill_images 
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads --compilation-mode spir-v --compilation-cache-path .
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches --compilation-mode spir-v --compilation-cache-path .
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images --compilation-mode spir-v --compilation-cache-path .
-Mem (Host Flags),mem_host_flags/test_mem_host_flags
-
-# #########################################
-# CPU is required to pass linear and normalized image filtering
-# #########################################
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
-
-# #########################################
-# OpenGL/CL interaction
-# #########################################
-OpenCL-GL Sharing,gl/test_gl --compilation-mode spir-v --compilation-cache-path .
-
-# #########################################
-# Thorough math and conversions tests
-# #########################################
-Select,select/test_select --compilation-mode spir-v --compilation-cache-path .
-Conversions,conversions/test_conversions --compilation-mode spir-v --compilation-cache-path .
-Contractions,contractions/test_contractions --compilation-mode spir-v --compilation-cache-path .
-Math,math_brute_force/test_bruteforce --compilation-mode spir-v --compilation-cache-path .
-Integer Ops,integer_ops/test_integer_ops --compilation-mode spir-v --compilation-cache-path .
-Half Ops,half/test_half --compilation-mode spir-v --compilation-cache-path .
-
-#####################################
-# OpenCL 2.0 tests
-#####################################
-C11 Atomics,c11_atomics/test_c11_atomics --compilation-mode spir-v --compilation-cache-path .
-Execution Model,device_execution/test_device_execution --compilation-mode spir-v --compilation-cache-path .
-Generic Address Space,generic_address_space/test_generic_address_space --compilation-mode spir-v --compilation-cache-path .
-Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group --compilation-mode spir-v --compilation-cache-path .
-Pipes,pipes/test_pipes --compilation-mode spir-v --compilation-cache-path .
-SVM,SVM/test_svm --compilation-mode spir-v --compilation-cache-path .
-Workgroups,workgroups/test_workgroups --compilation-mode spir-v --compilation-cache-path .
-
-#####################################
-# OpenCL 2.1 tests
-#####################################
-Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
-
-#########################################
-# Extensions
-#########################################
-SPIR,spir/test_spir
-Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
-Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
-Subgroups,subgroups/test_subgroups --compilation-mode spir-v --compilation-cache-path .

diff --git a/test_conformance/opencl_conformance_tests_math.csv b/test_conformance/opencl_conformance_tests_math.csv
index 03fddea..e033190 100644
--- a/test_conformance/opencl_conformance_tests_math.csv
+++ b/test_conformance/opencl_conformance_tests_math.csv

@@ -1,4 +1,4 @@
 #
 # OpenCL Conformance Test Suite (math only)
 #
-Math,math_brute_force/test_bruteforce
+Math,math_brute_force/test_bruteforce 

diff --git a/test_conformance/opencl_conformance_tests_quick.csv b/test_conformance/opencl_conformance_tests_quick.csv
index 043df82..af59165 100644
--- a/test_conformance/opencl_conformance_tests_quick.csv
+++ b/test_conformance/opencl_conformance_tests_quick.csv

@@ -32,7 +32,7 @@
 Profiling,profiling/test_profiling
 Events,events/test_events
 Allocations (single maximum),allocations/test_allocations single 5 all
-Allocations (total maximum),allocations/test_allocations multiple 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
 Printf,printf/test_printf
 Device Partitioning,device_partition/test_device_partition
 
@@ -40,22 +40,22 @@
 # Buffers and images
 # #########################################
 Buffers,buffers/test_buffers
-Images (API Info),images/clGetInfo/test_cl_get_info
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
 Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
 Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
 Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
 Images (clCopyImage),images/clCopyImage/test_cl_copy_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
 Mem (Host Flags),mem_host_flags/test_mem_host_flags
 
 # #########################################
@@ -79,29 +79,3 @@
 Math,math_brute_force/test_bruteforce -w
 Integer Ops,integer_ops/test_integer_ops integer_* quick_*
 Half Ops,half/test_half -w
-
-#####################################
-# OpenCL 2.0 tests
-#####################################
-C11 Atomics,c11_atomics/test_c11_atomics
-Execution Model,device_execution/test_device_execution
-Generic Address Space,generic_address_space/test_generic_address_space
-Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
-Pipes,pipes/test_pipes
-SVM,SVM/test_svm
-Workgroups,workgroups/test_workgroups
-
-#####################################
-# OpenCL 2.1 tests
-#####################################
-Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
-
-#########################################
-# Extensions
-#########################################
-SPIR,spir/test_spir
-Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
-Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
-Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
-Subgroups,subgroups/test_subgroups

diff --git a/test_conformance/pipes/main.cpp b/test_conformance/pipes/main.cpp
index 4241c4d..45b4c28 100644
--- a/test_conformance/pipes/main.cpp
+++ b/test_conformance/pipes/main.cpp

@@ -23,12 +23,9 @@
 test_status InitCL(cl_device_id device) {
   auto version = get_device_cl_version(device);
   auto expected_min_version = Version(2, 0);
-  if (version < expected_min_version)
-  {
-      version_expected_info("Test", "OpenCL",
-                            expected_min_version.to_string().c_str(),
-                            version.to_string().c_str());
-      return TEST_SKIP;
+  if (version < expected_min_version) {
+    version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+    return TEST_SKIP;
   }
 
   int error;

diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index 169ab80..85247f8 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -218,8 +218,9 @@
     const char *sources[] = { kernel_source.c_str() };
 
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources,
-                                      kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel[0], 1, sources, kernelName[0],
+        "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
 
     //Create consumer kernel
@@ -367,8 +368,9 @@
     const char *sources[] = { kernel_source.c_str() };
 
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources,
-                                      kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel[0], 1, sources, kernelName[0],
+        "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
 
     //Create consumer kernel
@@ -531,8 +533,9 @@
     const char *sources[] = { kernel_source.c_str() };
 
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources,
-                                      kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel[0], 1, sources, kernelName[0],
+        "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
 
     // Create consumer kernel

diff --git a/test_conformance/pipes/test_pipe_query_functions.cpp b/test_conformance/pipes/test_pipe_query_functions.cpp
index 21d1950..f9c93aa 100644
--- a/test_conformance/pipes/test_pipe_query_functions.cpp
+++ b/test_conformance/pipes/test_pipe_query_functions.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -140,9 +140,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
 
     // Create producer kernel
-    err = create_single_kernel_helper(
-        context, &program, &kernel[0], 1,
-        (const char **)&pipe_query_functions_kernel_code, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_query_functions_kernel_code, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
 
     //Create consumer kernel

diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index dd0d121..4bb4468 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -361,8 +361,8 @@
 {
     int            i;
     int            sum_input = 0, sum_output = 0;
-    cl_half *inptr = (cl_half *)ptr1;
-    cl_half *outptr = (cl_half *)ptr2;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
 
     for(i = 0; i < n; i++)
     {
@@ -531,8 +531,9 @@
         std::string kernel_source = sourceCode[i].str();
         const char *sources[] = { kernel_source.c_str() };
         // Create producer kernel
-        err = create_single_kernel_helper(context, &program[i], &kernel[ii], 1,
-                                          sources, kernelName[ii]);
+        err = create_single_kernel_helper_with_build_options(
+            context, &program[i], &kernel[ii], 1, sources, kernelName[ii],
+            "-cl-std=CL2.0");
 
         test_error_ret(err, " Error creating program", -1);
 
@@ -658,8 +659,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
 
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1,
-                                      &kernelCode, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, &kernelCode, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
 
     //Create consumer kernel

diff --git a/test_conformance/pipes/test_pipe_readwrite_errors.cpp b/test_conformance/pipes/test_pipe_readwrite_errors.cpp
index d4b4524..1b9fc31 100644
--- a/test_conformance/pipes/test_pipe_readwrite_errors.cpp
+++ b/test_conformance/pipes/test_pipe_readwrite_errors.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -115,9 +115,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
 
     // Create producer kernel
-    err = create_single_kernel_helper(
-        context, &program, &kernel[0], 1,
-        (const char **)&pipe_readwrite_errors_kernel_code, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_readwrite_errors_kernel_code, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
 
     //Create consumer kernel

diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp
index b3e1718..b41170c 100644
--- a/test_conformance/pipes/test_pipe_subgroups.cpp
+++ b/test_conformance/pipes/test_pipe_subgroups.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -146,9 +146,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
 
     // Create producer kernel
-    err = create_single_kernel_helper(
-        context, &program, &kernel[0], 1,
-        (const char **)&pipe_subgroups_kernel_code, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_subgroups_kernel_code, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
 
     //Create consumer kernel

diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index 2b804e4..a65a3dc 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp

@@ -20,7 +20,7 @@
 #include <memory>
 
 #if ! defined( _WIN32)
-#if defined(__APPLE__)
+#if ! defined( __ANDROID__ )
 #include <sys/sysctl.h>
 #endif
 #include <unistd.h>
@@ -59,7 +59,7 @@
 //Stream helper functions
 
 //Associate stdout stream with the file(gFileName):i.e redirect stdout stream to the specific files (gFileName)
-static int acquireOutputStream(int* error);
+static int acquireOutputStream();
 
 //Close the file(gFileName) associated with the stdout stream and disassociates it.
 static void releaseOutputStream(int fd);
@@ -141,15 +141,10 @@
 //-----------------------------------------
 // acquireOutputStream
 //-----------------------------------------
-static int acquireOutputStream(int* error)
+static int acquireOutputStream()
 {
     int fd = streamDup(fileno(stdout));
-    *error = 0;
-    if (!freopen(gFileName, "w", stdout))
-    {
-        releaseOutputStream(fd);
-        *error = -1;
-    }
+    freopen(gFileName,"w",stdout);
     return fd;
 }
 
@@ -306,22 +301,15 @@
 
     if(allTestCase[testId]->_type == TYPE_VECTOR)
     {
-        err = create_single_kernel_helper(
-            context, &program, kernel_ptr,
-            sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, testname);
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, NULL);
     }
     else if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
     {
-        err = create_single_kernel_helper(context, &program, kernel_ptr,
-                                          sizeof(sourceAddrSpace)
-                                              / sizeof(sourceAddrSpace[0]),
-                                          sourceAddrSpace, testname);
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceAddrSpace) / sizeof(sourceAddrSpace[0]), sourceAddrSpace, NULL);
     }
     else
     {
-        err = create_single_kernel_helper(
-            context, &program, kernel_ptr,
-            sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, testname);
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, NULL);
     }
 
     if (!program || err) {
@@ -329,6 +317,12 @@
         return NULL;
     }
 
+    *kernel_ptr = clCreateKernel(program, testname, &err);
+    if ( err ) {
+        log_error("clCreateKernel failed (%d)\n", err);
+        return NULL;
+    }
+
     return program;
 }
 
@@ -498,12 +492,7 @@
         }
     }
 
-    fd = acquireOutputStream(&err);
-    if (err != 0)
-    {
-        log_error("Error while redirection stdout to file");
-        goto exit;
-    }
+    fd = acquireOutputStream();
     globalWorkSize[0] = 1;
     cl_event ndrEvt;
     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL,&ndrEvt);
@@ -993,12 +982,7 @@
     uint32_t compute_devices = 0;
 
     int err;
-    gFd = acquireOutputStream(&err);
-    if (err != 0)
-    {
-        log_error("Error while redirection stdout to file");
-        return TEST_FAIL;
-    }
+    gFd = acquireOutputStream();
 
     size_t config_size = sizeof( device_frequency );
 #if MULTITHREAD
@@ -1024,24 +1008,20 @@
     auto expected_min_version = Version(1, 2);
     if (version < expected_min_version)
     {
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
     }
 
     log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
 
-    gFd = acquireOutputStream(&err);
-    if (err != 0)
-    {
-        log_error("Error while redirection stdout to file");
-        return TEST_FAIL;
-    }
-    cl_context_properties printf_properties[] = {
-        CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printfCallBack,
-        CL_PRINTF_BUFFERSIZE_ARM, ANALYSIS_BUFFER_SIZE, 0
-    };
+    gFd = acquireOutputStream();
+
+    cl_context_properties printf_properties[] =
+        {
+            CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printfCallBack,
+            CL_PRINTF_BUFFERSIZE_ARM, ANALYSIS_BUFFER_SIZE,
+            0
+        };
 
     cl_context_properties* props = NULL;
 

diff --git a/test_conformance/profiling/copy.cpp b/test_conformance/profiling/copy.cpp
index 46d1560..97e729e 100644
--- a/test_conformance/profiling/copy.cpp
+++ b/test_conformance/profiling/copy.cpp

@@ -107,14 +107,12 @@
     int_input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     int_output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
     if( !streams[0] ){
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
     if( !streams[1] ){
         log_error("clCreateBuffer failed\n");
         return -1;
@@ -252,15 +250,13 @@
     inptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
     outptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -440,6 +436,9 @@
     void                            *dst = NULL;
     cl_kernel                    kernel[1];
     size_t                        threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t                        localThreads[2];
+#endif
     int                                err = 0;
     cl_mem_flags            flags;
     unsigned int            num_channels = 4;
@@ -457,6 +456,16 @@
     threads[0] = (size_t)w;
     threads[1] = (size_t)h;
 
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
     inptr = (void *)generate_image( (int)num_bytes, d );
     if( ! inptr ){
         log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
@@ -471,7 +480,7 @@
     }
 
     // allocate the input image
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
     if( memobjs[0] == (cl_mem)0 ) {
         free( dst );
@@ -480,8 +489,7 @@
         return -1;
     }
 
-    memobjs[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, num_bytes, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), num_bytes, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ) {
         clReleaseMemObject(memobjs[0]);
         free( dst );
@@ -531,8 +539,11 @@
         return -1;
     }
 
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
+#else
     err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
-
+#endif
     if (err != CL_SUCCESS){
         print_error( err, "clEnqueueNDRangeKernel failed" );
         clReleaseKernel( kernel[0] );
@@ -789,7 +800,7 @@
     }
 
     // allocate the input image
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
     if( memobjs[0] == (cl_mem)0 ){
         free( dst );
@@ -798,9 +809,7 @@
         return -1;
     }
 
-    memobjs[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       channel_nbytes * num_channels * w * h, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * num_channels*w*h, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ) {
         clReleaseMemObject( memobjs[0] );
         free( dst );

diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp
index edfc043..31a5db8 100644
--- a/test_conformance/profiling/execute.cpp
+++ b/test_conformance/profiling/execute.cpp

@@ -175,6 +175,9 @@
     cl_event            executeEvent;
     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
     size_t                threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t                localThreads[2];
+#endif
     float                filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
     int                    filter_w = 3, filter_h = 3;
     int                    err = 0;
@@ -183,10 +186,19 @@
     threads[0] = w;
     threads[1] = h;
 
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
     // allocate the input and output image memory objects
-    memobjs[0] =
-        create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                        &image_format_desc, w, h, 0, inptr, &err);
+    memobjs[0] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR),
+                                 &image_format_desc, w, h, 0, inptr, &err );
     if( memobjs[0] == (cl_mem)0 ){
         log_error( " unable to create 2D image using create_image_2d\n" );
         return -1;
@@ -200,9 +212,8 @@
     }
 
     // allocate an array memory object to load the filter weights
-    memobjs[2] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        sizeof(cl_float) * filter_w * filter_h, &filter_weights, &err);
+    memobjs[2] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR ),
+                               sizeof( cl_float ) * filter_w * filter_h, &filter_weights, &err );
     if( memobjs[2] == (cl_mem)0 ){
         log_error( " unable to create array using clCreateBuffer\n" );
         clReleaseMemObject( memobjs[1] );
@@ -238,7 +249,11 @@
         return -1;
     }
 
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, &executeEvent );
+#else
     err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, &executeEvent );
+#endif
 
     if( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed\n" );

diff --git a/test_conformance/profiling/execute_multipass.cpp b/test_conformance/profiling/execute_multipass.cpp
index a264232..7051244 100644
--- a/test_conformance/profiling/execute_multipass.cpp
+++ b/test_conformance/profiling/execute_multipass.cpp

@@ -122,18 +122,14 @@
     }
 
     // allocate the input and output image memory objects
-    memobjs[0] =
-        create_image_3d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                        &image_format_desc, w, h, d, 0, 0, inptr, &err);
+    memobjs[0] = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), &image_format_desc, w, h, d, 0, 0, inptr, &err );
     if( memobjs[0] == (cl_mem)0 ){
         log_error( " unable to create 2D image using create_image_2d\n" );
         return -1;
     }
 
     // allocate an array memory object to load the filter weights
-    memobjs[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * w * h * d * nChannels, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_WRITE ), sizeof( cl_float ) * w*h*d*nChannels, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ){
         log_error( " unable to create array using clCreateBuffer\n" );
         clReleaseMemObject( memobjs[0] );

diff --git a/test_conformance/profiling/main.cpp b/test_conformance/profiling/main.cpp
index 6e59f61..bd36797 100644
--- a/test_conformance/profiling/main.cpp
+++ b/test_conformance/profiling/main.cpp

@@ -126,7 +126,6 @@
 
 int main( int argc, const char *argv[] )
 {
-    return runTestHarness(argc, argv, test_num, test_list, false,
-                          CL_QUEUE_PROFILING_ENABLE);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, CL_QUEUE_PROFILING_ENABLE );
 }
 

diff --git a/test_conformance/profiling/readArray.cpp b/test_conformance/profiling/readArray.cpp
index 85ab9a2..04bccbf 100644
--- a/test_conformance/profiling/readArray.cpp
+++ b/test_conformance/profiling/readArray.cpp

@@ -622,12 +622,25 @@
     cl_event        readEvent;
     cl_ulong    queueStart, submitStart, readStart, readEnd;
     size_t            threads[1];
+#ifdef USE_LOCAL_THREADS
+    size_t            localThreads[1];
+#endif
     int                err, err_count = 0;
     int                i;
     size_t            ptrSizes[5];
 
     threads[0] = (size_t)num_elements;
 
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        log_error( "Unable to get thread group max size: %d", err );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+#endif
+
     ptrSizes[0] = size;
     ptrSizes[1] = ptrSizes[0] << 1;
     ptrSizes[2] = ptrSizes[1] << 1;
@@ -639,8 +652,7 @@
             log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
             return -1;
         }
-        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    ptrSizes[i] * num_elements, NULL, &err);
+        streams[i] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
         if( !streams[i] ){
             log_error( " clCreateBuffer failed\n" );
             free( outptr[i] );
@@ -664,8 +676,11 @@
             return -1;
         }
 
+#ifdef USE_LOCAL_THREADS
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+#else
         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-
+#endif
         if( err != CL_SUCCESS ){
             print_error( err, "clEnqueueNDRangeKernel failed" );
             clReleaseKernel( kernel[i] );
@@ -878,3 +893,105 @@
     return test_stream_read( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1,
                              stream_read_struct_kernel_code, struct_kernel_name, foo );
 }
+
+/*
+int read_struct_array(cl_device_group device, cl_device id, cl_context context, int num_elements)
+{
+    cl_mem            streams[1];
+    TestStruct        *output_ptr;
+    cl_program        program[1];
+    cl_kernel        kernel[1];
+    void            *values[1];
+    size_t            sizes[1] = { sizeof(cl_stream) };
+    size_t            threads[1];
+#ifdef USE_LOCAL_THREADS
+    size_t            localThreads[1];
+#endif
+    int                err;
+    size_t            objSize = sizeof(TestStruct);
+
+    threads[0] = (size_t)num_elements;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        log_error( "Unable to get thread group max size: %d", err );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+#endif
+
+    output_ptr = malloc(objSize * num_elements);
+    if( ! output_ptr ){
+        log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
+        return -1;
+    }
+    streams[0] = clCreateBuffer( device, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL );
+    if( !streams[0] ){
+        log_error( " clCreateBuffer failed\n" );
+        free( output_ptr );
+        return -1;
+    }
+
+    err = create_program_and_kernel( device, stream_read_struct_kernel_code, "test_stream_read_struct", &program[0], &kernel[0]);
+    if( err ){
+        clReleaseProgram( program[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&streams[0] );
+    if( err != CL_SUCCESS){
+        print_error( err, "clSetKernelArg failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, localThreads, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
+#endif
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+    if (verify_read_struct(output_ptr, num_elements)){
+        log_error(" struct test failed\n");
+        err = -1;
+    }
+    else{
+        log_info(" struct test passed\n");
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseProgram( program[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseMemObject( streams[0] );
+    free( output_ptr );
+
+    return err;
+}
+*/
+
+

diff --git a/test_conformance/profiling/readImage.cpp b/test_conformance/profiling/readImage.cpp
index 9ba6b47..c1a0894 100644
--- a/test_conformance/profiling/readImage.cpp
+++ b/test_conformance/profiling/readImage.cpp

@@ -130,6 +130,9 @@
     cl_event        readEvent;
     cl_ulong        queueStart, submitStart, readStart, readEnd;
     size_t          threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t          localThreads[2];
+#endif
     int                err;
     int                w = 64, h = 64;
     cl_mem_flags    flags;
@@ -147,6 +150,16 @@
     threads[0] = (size_t)w;
     threads[1] = (size_t)h;
 
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
     d = init_genrand( gRandomSeed );
     if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
         inptr = (void *)generateSignedImage( w * h * 4, d );
@@ -167,7 +180,7 @@
     }
 
     // allocate the input and output image memory objects
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
     if( memobjs[0] == (cl_mem)0 ){
         free( dst );
@@ -176,8 +189,7 @@
         return -1;
     }
 
-    memobjs[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                channel_nbytes * 4 * w * h, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  channel_nbytes * 4 * w * h, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ){
         free( dst );
         free( (void *)inptr );
@@ -219,8 +231,11 @@
         return -1;
     }
 
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
+#else
     err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
-
+#endif
     if( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed" );
         clReleaseKernel( kernel[0] );

diff --git a/test_conformance/profiling/writeArray.cpp b/test_conformance/profiling/writeArray.cpp
index acfe8f2..550294a 100644
--- a/test_conformance/profiling/writeArray.cpp
+++ b/test_conformance/profiling/writeArray.cpp

@@ -25,6 +25,8 @@
 #include "harness/errorHelpers.h"
 #include "harness/conversions.h"
 
+//#define USE_LOCAL_THREADS    1
+
 #ifndef uchar
 typedef unsigned char uchar;
 #endif
@@ -551,8 +553,8 @@
 static int verify_write_half( void *ptr1, void *ptr2, int n )
 {
     int        i;
-    cl_half *inptr = (cl_half *)ptr1;
-    cl_half *outptr = (cl_half *)ptr2;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
 
     for( i = 0; i < n; i++ ){
         if( outptr[i] != inptr[i] )
@@ -619,11 +621,24 @@
     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
     size_t            ptrSizes[5], outPtrSizes[5];
     size_t            threads[1];
+#ifdef USE_LOCAL_THREADS
+    size_t            localThreads[1];
+#endif
     int                err, err_count = 0;
     int                i, ii;
 
     threads[0] = (size_t)num_elements;
 
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, " Unable to get thread group max size" );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+#endif
+
     ptrSizes[0] = size;
     ptrSizes[1] = ptrSizes[0] << 1;
     ptrSizes[2] = ptrSizes[1] << 1;
@@ -639,8 +654,7 @@
 
     for( i = 0; i < loops; i++ ){
         ii = i << 1;
-        streams[ii] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                     ptrSizes[i] * num_elements, NULL, &err);
+        streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
         if( ! streams[ii] ){
             free( outptr[i] );
             log_error( " clCreateBuffer failed\n" );
@@ -648,15 +662,11 @@
         }
         if( ! strcmp( type, "half" ) ){
             outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 );
-            streams[ii + 1] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               outPtrSizes[i] * 2 * num_elements, NULL, &err);
+            streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * 2 * num_elements, NULL, &err );
         }
         else{
             outptr[i] = malloc( outPtrSizes[i] * num_elements );
-            streams[ii + 1] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               outPtrSizes[i] * num_elements, NULL, &err);
+            streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * num_elements, NULL, &err );
         }
         if( ! streams[ii+1] ){
             clReleaseMemObject(streams[ii]);
@@ -754,8 +764,11 @@
             return -1;
         }
 
+#ifdef USE_LOCAL_THREADS
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+#else
         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-
+#endif
         if( err != CL_SUCCESS ){
             print_error( err, " clEnqueueNDRangeKernel failed" );
             clReleaseEvent(writeEvent);
@@ -800,7 +813,7 @@
         }
         if( !err2 )
         {
-            log_info(" %s%d data verified\n", type, 1 << i);
+            log_info( " %s%d data verified\n", type, 1<<i );
         }
         err = err2;
 
@@ -821,6 +834,150 @@
 }    // end test_stream_write()
 
 
+
+/*
+ int test_stream_struct_write( cl_device_group device, cl_device id, cl_context context, int num_elements )
+ {
+ cl_mem            streams[10];
+ void            *outptr[5];
+ TestStruct        *inptr[5];
+ cl_program        program[5];
+ cl_kernel        kernel[5];
+ void            *values[2];
+ size_t            sizes[2] = { sizeof(cl_stream), sizeof(cl_stream) };
+ size_t            ptrSizes[5];
+ size_t            size = sizeof( TestStruct );
+ size_t            threads[1];
+ #ifdef USE_LOCAL_THREADS
+ size_t            localThreads[1];
+ #endif
+ int                err;
+ int                i, ii, j;
+ int                loops = 1;        // no vector for structs
+
+ threads[0] = (size_t)num_elements;
+
+ #ifdef USE_LOCAL_THREADS
+ err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+ if( err != CL_SUCCESS ){
+ log_error( "Unable to get thread group max size: %d", err );
+ return -1;
+ }
+ if( localThreads[0] > threads[0] )
+ localThreads[0] = threads[0];
+ #endif
+
+ ptrSizes[0] = size;
+ ptrSizes[1] = ptrSizes[0] << 1;
+ ptrSizes[2] = ptrSizes[1] << 1;
+ ptrSizes[3] = ptrSizes[2] << 1;
+ ptrSizes[4] = ptrSizes[3] << 1;
+
+
+ loops = ( loops < 5 ? loops : 5 );
+ for( i = 0; i < loops; i++ ){
+
+ inptr[i] = (TestStruct *)malloc(ptrSizes[i] * num_elements);
+
+ for( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
+ inptr[i][j].a = (int)random_float( -2147483648.f, 2147483647.0f );
+ inptr[i][j].b = random_float( -FLT_MAX, FLT_MAX );
+ }
+
+ ii = i << 1;
+ streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
+ if( ! streams[ii] ){
+ free( outptr[i] );
+ log_error( " clCreateBuffer failed\n" );
+ return -1;
+ }
+ outptr[i] = malloc( ptrSizes[i] * num_elements );
+ streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
+ if( ! streams[ii+1] ){
+ clReleaseMemObject(streams[ii]);
+ free( outptr[i] );
+ log_error( " clCreateBuffer failed\n" );
+ return -1;
+ }
+
+ err = clWriteArray(context, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], NULL);
+ if( err != CL_SUCCESS ){
+ clReleaseMemObject(streams[ii]);
+ clReleaseMemObject(streams[ii+1]);
+ free( outptr[i] );
+ print_error( err, " clWriteArray failed" );
+ return -1;
+ }
+
+ err = create_program_and_kernel( device, struct_kernel_code, "read_write_struct", &program[i], &kernel[i] );
+ if( err ){
+ clReleaseMemObject(streams[ii]);
+ clReleaseMemObject(streams[ii+1]);
+ free( outptr[i] );
+ log_error( " Error creating program for struct\n" );
+ return -1;
+ }
+
+ err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
+ err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
+ if (err != CL_SUCCESS){
+ clReleaseProgram( program[i] );
+ clReleaseKernel( kernel[i] );
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ free( outptr[i] );
+ print_error( err, " clSetKernelArg failed" );
+ return -1;
+ }
+
+ #ifdef USE_LOCAL_THREADS
+ err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+ #else
+ err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+ #endif
+ if( err != CL_SUCCESS ){
+ print_error( err, " clEnqueueNDRangeKernel failed" );
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ return -1;
+ }
+
+ err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ print_error( err, " clEnqueueReadBuffer failed" );
+ return -1;
+ }
+
+ if( verify_write_struct( inptr[i], outptr[i], ptrSizes[i] * num_elements / ptrSizes[0] ) ){
+ log_error( " STREAM_WRITE struct%d test failed\n", 1<<i );
+ err = -1;
+ }
+ else{
+ log_info( " STREAM_WRITE struct%d test passed\n", 1<<i );
+ err = 0;
+ }
+ // cleanup
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ free( (void *)inptr[i] );
+ }
+
+ return err;
+
+ }    // end test_stream_struct_write()
+ */
+
 int test_write_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
 {
     int    *inptr[5];

diff --git a/test_conformance/profiling/writeImage.cpp b/test_conformance/profiling/writeImage.cpp
index fbc8fbc..5085896 100644
--- a/test_conformance/profiling/writeImage.cpp
+++ b/test_conformance/profiling/writeImage.cpp

@@ -413,6 +413,9 @@
     cl_event        writeEvent;
     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
     size_t    threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t    localThreads[2];
+#endif
     int                err;
     int                w = 64, h = 64;
     cl_mem_flags    flags;
@@ -433,6 +436,16 @@
     threads[0] = (size_t)w;
     threads[1] = (size_t)h;
 
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
     d = init_genrand( gRandomSeed );
     if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
         inptr = (void *)generateSignedImage( w * h * 4, d );
@@ -452,7 +465,7 @@
     }
 
     // allocate the input and output image memory objects
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
     if( memobjs[0] == (cl_mem)0 ){
         free( dst );
@@ -461,8 +474,7 @@
         return -1;
     }
 
-    memobjs[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                channel_nbytes * 4 * w * h, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  channel_nbytes * 4 * w * h, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ){
         free( dst );
         free( (void *)inptr );
@@ -569,8 +581,11 @@
         return -1;
     }
 
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
+#else
     err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
-
+#endif
     if( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed" );
     clReleaseEvent(writeEvent);

diff --git a/test_conformance/relationals/main.cpp b/test_conformance/relationals/main.cpp
index 61bde2d..ec495c8 100644
--- a/test_conformance/relationals/main.cpp
+++ b/test_conformance/relationals/main.cpp

@@ -70,6 +70,6 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/relationals/test_comparisons_double.cpp b/test_conformance/relationals/test_comparisons_double.cpp
index 3fe1124..9dc737f 100644
--- a/test_conformance/relationals/test_comparisons_double.cpp
+++ b/test_conformance/relationals/test_comparisons_double.cpp

@@ -151,17 +151,13 @@
     generate_equiv_test_data_double( inDataA, vecSize, true, d );
     generate_equiv_test_data_double( inDataB, vecSize, false, d );
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataB, &error);
     if( streams[1] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");

diff --git a/test_conformance/relationals/test_comparisons_float.cpp b/test_conformance/relationals/test_comparisons_float.cpp
index 989c70c..e8178d8 100644
--- a/test_conformance/relationals/test_comparisons_float.cpp
+++ b/test_conformance/relationals/test_comparisons_float.cpp

@@ -158,17 +158,13 @@
     generate_equiv_test_data_float( inDataA, vecSize, true, d );
     generate_equiv_test_data_float( inDataB, vecSize, false, d );
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataB, &error);
     if( streams[1] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");

diff --git a/test_conformance/relationals/test_relationals.cpp b/test_conformance/relationals/test_relationals.cpp
index 5a874af..7095240 100644
--- a/test_conformance/relationals/test_relationals.cpp
+++ b/test_conformance/relationals/test_relationals.cpp

@@ -89,19 +89,13 @@
     generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataA );
     memset( clearData, 0, sizeof( clearData ) );
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(vecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");
         return -1;
     }
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_int) * g_vector_aligns[vecSize] * TEST_SIZE,
-                       clearData, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * g_vector_aligns[vecSize] * TEST_SIZE, clearData, &error );
     if( streams[1] == NULL )
     {
         print_error( error, "Creating output array failed!\n");
@@ -369,28 +363,19 @@
     generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataB );
     generate_random_data( testVecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataC );
 
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(vecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");
         return -1;
     }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(vecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataB, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataB, &error);
     if( streams[1] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");
         return -1;
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(testVecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataC, &error);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( testVecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataC, &error);
     if( streams[2] == NULL )
     {
         print_error( error, "Creating input array A failed!\n");

diff --git a/test_conformance/relationals/test_shuffles.cpp b/test_conformance/relationals/test_shuffles.cpp
index 5fd3b6c..c784b65 100644
--- a/test_conformance/relationals/test_shuffles.cpp
+++ b/test_conformance/relationals/test_shuffles.cpp

@@ -621,22 +621,16 @@
     if( shuffleMode == kBuiltInDualInputFnMode )
         generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
 
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * inVecSize * numOrders, inData, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inData, &error);
     test_error( error, "Unable to create input stream" );
 
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * outRealVecSize * numOrders, outData, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * outRealVecSize * numOrders, outData, &error);
     test_error( error, "Unable to create output stream" );
 
     int argIndex = 0;
     if( shuffleMode == kBuiltInDualInputFnMode )
     {
-        streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    typeSize * inVecSize * numOrders,
-                                    inSecondData, &error);
+        streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inSecondData, &error);
         test_error( error, "Unable to create second input stream" );
 
         error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );

diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 35f154a..518d030 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp

@@ -20,7 +20,7 @@
 #include <time.h>
 #include <string.h>
 #if ! defined( _WIN32)
-#if defined(__APPLE__)
+#if ! defined( __ANDROID__ )
 #include <sys/sysctl.h>
 #endif
 #endif
@@ -278,11 +278,8 @@
 
     // create program
     cl_program program;
-    const char **psrc = vec_len == 3 ? sourceV3 : source;
-    size_t src_size = vec_len == 3 ? ARRAY_SIZE(sourceV3) : ARRAY_SIZE(source);
 
-    if (create_single_kernel_helper(context, &program, kernel_ptr, src_size,
-                                    psrc, testname))
+    if (create_single_kernel_helper(context, &program, kernel_ptr, (cl_uint)(vec_len == 3 ? sizeof(sourceV3) / sizeof(sourceV3[0]) : sizeof(source) / sizeof(source[0])), vec_len == 3 ? sourceV3 : source, testname))
     {
         log_error("Failed to build program (%d)\n", err);
         return NULL;
@@ -343,7 +340,7 @@
         programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] );
         if (!programs[vecsize] || !kernels[vecsize]) {
             ++s_test_fail;
-            ++s_test_cnt;
+            ++s_test_cnt;

             return -1;
         }
     }
@@ -645,7 +642,7 @@
         log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor);
     }
 
-    int err = runTestHarness(argCount, argList, test_num, test_list, false, 0);
+    int err = runTestHarness( argCount, argList, test_num, test_list, false, false, 0 );
 
     free( argList );
 

diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp
index f9641e9..71c58bc 100644
--- a/test_conformance/select/util_select.cpp
+++ b/test_conformance/select/util_select.cpp

@@ -561,18 +561,13 @@
     const cl_uchar *c = (const cl_uchar *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_uchar) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%2.2x vs 0x%2.2x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for(i = 0; i < count; i++)
+        if (t[i] != c[i]) {
+            log_error("\n(check_uchar) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]);
+            return i + 1;
+        }
+
     return 0;
 }
 
@@ -581,18 +576,13 @@
     const cl_char *c = (const cl_char *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_char) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%2.2x vs 0x%2.2x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] ) {
+            log_error("\n(check_char) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }
@@ -602,18 +592,13 @@
     const cl_ushort *c = (const cl_ushort *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_ushort) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%4.4x vs 0x%4.4x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+    for( i = 0; i < count; i++ )
+        if(t[i] != c[i]) {
+            log_error("\n(check_ushort) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }
@@ -623,18 +608,13 @@
     const cl_short *c = (const cl_short *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_short) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+    for (i = 0; i < count; i++)
+        if(t[i] != c[i]) {
+            log_error("\n(check_short) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }
@@ -644,18 +624,14 @@
     const cl_uint *c = (const cl_uint *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_uint) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+
+    for (i = 0; i < count; i++)
+        if(t[i] != c[i]) {
+            log_error("\n(check_uint) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }
@@ -665,19 +641,24 @@
     const cl_int *c = (const cl_int *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
 
-                log_error("\n(check_int) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
+    for(i = 0; i < count; i++)
+        if( t[i] != c[i] ) {
+
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+1, count,c[i+1], t[i+1]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+2, count,c[i+2], t[i+2]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+3, count,c[i+3], t[i+3]);
+            if(i) {
+                log_error("\n(check_int) Error for vector size %ld found just after 0x%8.8lx:  "
+                          "*0x%8.8x vs 0x%8.8x\n", vector_size, i-1, c[i-1], t[i-1]);
             }
-    }
+            return i + 1;
+        }
 
     return 0;
 }
@@ -687,18 +668,13 @@
     const cl_ulong *c = (const cl_ulong *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_ulong) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%16.16llx vs 0x%16.16llx\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] ) {
+            log_error("\n(check_ulong) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }
@@ -708,18 +684,13 @@
     const cl_long *c = (const cl_long *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_long) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%16.16llx vs 0x%16.16llx\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+    for(i = 0; i < count; i++ )
+        if(t[i] != c[i]) {
+            log_error("\n(check_long) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }
@@ -729,19 +700,14 @@
     const cl_uint *c = (const cl_uint *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++) /* Allow nans to be binary different */
-            if ((t[i] != c[i])
-                && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i])))
-            {
-                log_error("\n(check_float) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+    for( i = 0; i < count; i++ )
+        /* Allow nans to be binary different */
+        if ((t[i] != c[i]) && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i]))) {
+            log_error("\n(check_float) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }
@@ -751,20 +717,15 @@
     const cl_ulong *c = (const cl_ulong *) correct;
     size_t i;
 
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++) /* Allow nans to be binary different */
-            if ((t[i] != c[i])
-                && !(isnan(((double *)correct)[i])
-                     && isnan(((double *)test)[i])))
-            {
-                log_error("\n(check_double) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%16.16llx vs 0x%16.16llx\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+
+
+    for( i = 0; i < count; i++ )
+        /* Allow nans to be binary different */
+        if ((t[i] != c[i]) && !(isnan(((double *)correct)[i]) && isnan(((double *)test)[i]))) {
+            log_error("\n(check_double) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
 
     return 0;
 }

diff --git a/test_conformance/spir/CMakeLists.txt b/test_conformance/spir/CMakeLists.txt
index f65c031..70effa1 100644
--- a/test_conformance/spir/CMakeLists.txt
+++ b/test_conformance/spir/CMakeLists.txt

@@ -9,7 +9,7 @@
     run_build_test.cpp
     run_services.cpp
     kernelargs.cpp
-    ../math_brute_force/function_list.cpp
+    ../math_brute_force/FunctionList.cpp
 )
 
 add_executable(${SPIR_OUT}

diff --git a/test_conformance/spir/kernelargs.h b/test_conformance/spir/kernelargs.h
index 7c5673e..2aa86d7 100644
--- a/test_conformance/spir/kernelargs.h
+++ b/test_conformance/spir/kernelargs.h

@@ -348,9 +348,9 @@
         if( NULL != buffer )
         {
             int error = CL_SUCCESS;
-            m_memObj = clCreateBuffer(context,
-                                      CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                                      size, buffer, &error);
+            m_memObj = clCreateBuffer( context,
+                                       (cl_mem_flags)( CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR ),
+                                       size, buffer, &error );
             if( error != CL_SUCCESS )
             {
                 throw Exceptions::TestError("clCreateBuffer failed\n", error);

diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp
index 3a18988..5634d5b 100644
--- a/test_conformance/spir/main.cpp
+++ b/test_conformance/spir/main.cpp

@@ -141,8 +141,7 @@
     return false;
 }
 
-static void get_spir_version(cl_device_id device,
-                             std::vector<Version> &versions)
+static void get_spir_version(cl_device_id device, std::vector<float>& versions)
 {
     char version[64] = {0};
     cl_int err;
@@ -163,11 +162,11 @@
     std::copy(std::istream_iterator<std::string>(versionStream),
               std::istream_iterator<std::string>(),
               std::back_inserter(versionVector));
-    for (auto &v : versionVector)
+    for(std::list<std::string>::const_iterator it = versionVector.begin(),
+                                               e  = versionVector.end(); it != e;
+                                               it++)
     {
-        auto major = v[v.find('.') - 1];
-        auto minor = v[v.find('.') + 1];
-        versions.push_back(Version{ major - '0', minor - '0' });
+        versions.push_back(atof(it->c_str()));
     }
 }
 
@@ -6930,12 +6929,10 @@
         cl_device_id device = get_platform_device(device_type, choosen_device_index, choosen_platform_index);
         printDeviceHeader(device);
 
-        std::vector<Version> versions;
+        std::vector<float> versions;
         get_spir_version(device, versions);
-
-        if (!is_extension_available(device, "cl_khr_spir")
-            || (std::find(versions.begin(), versions.end(), Version{ 1, 2 })
-                == versions.end()))
+        if (!is_extension_available( device, "cl_khr_spir") ||
+            std::find(versions.begin(), versions.end(), 1.2f) == versions.end())
         {
             log_info("Spir extension version 1.2 is not supported by the device\n");
             return 0;

diff --git a/test_conformance/spir/run_build_test.cpp b/test_conformance/spir/run_build_test.cpp
index 9264d3a..cec2d27 100644
--- a/test_conformance/spir/run_build_test.cpp
+++ b/test_conformance/spir/run_build_test.cpp

@@ -33,12 +33,12 @@
 #include "harness/clImageHelper.h"
 #include "harness/os_helpers.h"
 
-#include "../math_brute_force/function_list.h"
-#include "datagen.h"
 #include "exceptions.h"
 #include "kernelargs.h"
-#include "run_build_test.h"
+#include "datagen.h"
 #include "run_services.h"
+#include "run_build_test.h"
+#include "../math_brute_force/FunctionList.h"
 #include <CL/cl.h>
 //
 // Task

diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp
index 06fc418..257dbff 100644
--- a/test_conformance/spir/run_services.cpp
+++ b/test_conformance/spir/run_services.cpp

@@ -302,11 +302,18 @@
 
 std::string& DataRow::operator[](int column)
 {
-    assert((column > -1 && (size_t)column <= m_row.size())
-           && "Index out of bound");
-    if ((size_t)column == m_row.size()) m_row.push_back("");
+    assert((column > -1) && "Index out of bound");
 
-    return m_row[column];
+    if ((size_t)column < m_row.size())
+        return m_row[column];
+
+    if (column == m_row.size())
+    {
+        m_row.push_back("");
+        return m_row[column];
+    }
+
+    assert(0 && "Index out of bound.");
 }
 
 /*

diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt
index 7500571..614d5a7 100644
--- a/test_conformance/spirv_new/CMakeLists.txt
+++ b/test_conformance/spirv_new/CMakeLists.txt

@@ -18,8 +18,8 @@
 
 set(TEST_HARNESS_SOURCES
   ../../test_conformance/math_brute_force/reference_math.cpp
-  ../../test_conformance/math_brute_force/utility.cpp
-)
+  ../../test_conformance/math_brute_force/Utility.cpp
+  )
 
 set(${MODULE_NAME}_SOURCES ${SPIRV_NEW_SOURCES} ${TEST_HARNESS_SOURCES})
 

diff --git a/test_conformance/spirv_new/assemble_spirv.py b/test_conformance/spirv_new/assemble_spirv.py
deleted file mode 100755
index 99b16ad..0000000
--- a/test_conformance/spirv_new/assemble_spirv.py
+++ /dev/null

@@ -1,164 +0,0 @@
-#!/usr/bin/env python3
-
-#####################################################################
-# Copyright (c) 2020 The Khronos Group Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#####################################################################
-
-"""Assembles the SPIR-V assembly files used by spirv_new into binaries,
-   and validates them using spirv-val.  Either run this from the parent
-   of the spirv_asm directory, or pass the --source-dir and --output-dir
-   options to specify the locations of the assembly files and the
-   binaries to be generated.
-"""
-
-import argparse
-import glob
-import os
-import subprocess
-import sys
-from textwrap import wrap
-
-
-def fatal(message):
-    """Print an error message and exit with a non-zero status, to
-       indicate failure.
-    """
-    print(message)
-    sys.exit(1)
-
-
-def assemble_spirv(asm_dir, bin_dir, spirv_as, verbose):
-    """Assemble SPIR-V source into binaries."""
-
-    if not os.path.exists(bin_dir):
-        os.makedirs(bin_dir)
-
-    assembly_failures = False
-
-    for asm_file_path in glob.glob(os.path.join(asm_dir, '*.spvasm*')):
-        asm_file = os.path.basename(asm_file_path)
-        if os.path.isfile(asm_file_path):
-            if verbose:
-                print(' Assembling {}'.format(asm_file))
-
-            asm_file_root, asm_file_ext = os.path.splitext(asm_file)
-            bin_file = asm_file_root + asm_file_ext.replace('asm', '')
-            bin_file_path = os.path.join(bin_dir, bin_file)
-
-            command = '"{}" --target-env spv1.0 "{}" -o "{}"'.format(
-                spirv_as, asm_file_path, bin_file_path)
-            if subprocess.call(command, shell=True) != 0:
-                assembly_failures = True
-                print('ERROR: Failure assembling {}: '
-                      'see above output.'.format(
-                          asm_file))
-                print()
-
-    if assembly_failures:
-        fatal('\n'.join(wrap(
-            'ERROR: Assembly failure(s) occurred.  See above for error '
-            'messages from the assembler, if any.')))
-
-
-def validate_spirv(bin_dir, spirv_val, verbose):
-    """Validates SPIR-V binaries.  Ignores known failures."""
-
-    validation_failures = False
-
-    for bin_file_path in glob.glob(os.path.join(bin_dir, '*.spv*')):
-        bin_file = os.path.basename(bin_file_path)
-        if os.path.isfile(bin_file_path):
-            if verbose:
-                print(' Validating {}'.format(bin_file))
-
-            command = '"{}" "{}"'.format(
-                spirv_val, bin_file_path)
-            if subprocess.call(command, shell=True) != 0:
-                print('ERROR: Failure validating {}: '
-                      'see above output.'.format(
-                          bin_file))
-                validation_failures = True
-                print()
-
-    if validation_failures:
-        fatal('ERROR: Validation failure(s) found.  '
-              'See above for validation output.')
-    else:
-        print('All SPIR-V binaries validated successfully.')
-
-
-def parse_args():
-    """Parse the command-line arguments."""
-
-    argparse_kwargs = (
-        {'allow_abbrev': False} if sys.version_info >= (3, 5) else {})
-    argparse_kwargs['description'] = (
-        '''Assembles the SPIR-V assembly files used by spirv_new into
-           binaries, and validates them using spirv-val.  Either run this
-           from the parent of the spirv_asm directory, or pass the
-           --source-dir and --output-dir options to specify the locations
-           the assembly files and the binaries to be generated.
-        ''')
-    parser = argparse.ArgumentParser(**argparse_kwargs)
-    parser.add_argument('-s', '--source-dir', metavar='DIR',
-                        default='spirv_asm',
-                        help='''specifies the directory containing SPIR-V
-                                assembly files''')
-    parser.add_argument('-o', '--output-dir', metavar='DIR',
-                        default='spirv_bin',
-                        help='''specifies the directory in which to
-                                output SPIR-V binary files''')
-    parser.add_argument('-a', '--assembler', metavar='PROGRAM',
-                        default='spirv-as',
-                        help='''specifies the program to use for assembly
-                                of SPIR-V, defaults to spirv-as''')
-    parser.add_argument('-l', '--validator', metavar='PROGRAM',
-                        default='spirv-val',
-                        help='''specifies the program to use for validation
-                                of SPIR-V, defaults to spirv-val''')
-    parser.add_argument('-k', '--skip-validation', action='store_true',
-                        default=False,
-                        help='skips validation of the genareted SPIR-V')
-    parser.add_argument('-v', '--verbose', action='store_true', default=False,
-                        help='''enable verbose output (i.e. prints the
-                                name of each SPIR-V assembly file or
-                                binary as it is assembled or validated)
-                             ''')
-    return parser.parse_args()
-
-
-def main():
-    """Main function.  Assembles and validates SPIR-V."""
-
-    args = parse_args()
-
-    print('Assembling SPIR-V source into binaries...')
-    assemble_spirv(args.source_dir, args.output_dir, args.assembler,
-                   args.verbose)
-    print('Finished assembling SPIR-V binaries.')
-    print()
-
-    if args.skip_validation:
-        print('Skipping validation of SPIR-V binaries as requested.')
-    else:
-        print('Validating SPIR-V binaries...')
-        validate_spirv(args.output_dir, args.validator, args.verbose)
-    print()
-
-    print('Done.')
-
-
-if __name__ == '__main__':
-    main()

diff --git a/test_conformance/spirv_new/main.cpp b/test_conformance/spirv_new/main.cpp
index 5a8664b..42a1251 100644
--- a/test_conformance/spirv_new/main.cpp
+++ b/test_conformance/spirv_new/main.cpp

@@ -74,15 +74,14 @@
     return testDefinitions.size();
 }
 
-void spirvTestsRegistry::addTestClass(baseTestClass *test, const char *testName,
-                                      Version version)
+void spirvTestsRegistry::addTestClass(baseTestClass *test, const char *testName)
 {
 
     testClasses.push_back(test);
     test_definition testDef;
     testDef.func = test->getFunction();
     testDef.name = testName;
-    testDef.min_version = version;
+    testDef.min_version = Version(2, 1);
     testDefinitions.push_back(testDef);
 }
 
@@ -136,63 +135,27 @@
     return err;
 }
 
-int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID,
-                        const cl_context context, const char *prog_name,
-                        spec_const spec_const_def)
+int get_program_with_il(clProgramWrapper &prog,
+                        const cl_device_id deviceID,
+                        const cl_context context,
+                        const char *prog_name)
 {
     cl_int err = 0;
-    if (gCompilationMode == kBinary)
-    {
+    if (gCompilationMode == kBinary) {
         return offline_get_program_with_il(prog, deviceID, context, prog_name);
     }
 
     std::vector<unsigned char> buffer_vec = readSPIRV(prog_name);
 
     int file_bytes = buffer_vec.size();
-    if (file_bytes == 0)
-    {
+    if (file_bytes == 0) {
         log_error("File %s not found\n", prog_name);
         return -1;
     }
 
     unsigned char *buffer = &buffer_vec[0];
-    if (gCoreILProgram)
-    {
-        prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
-        SPIRV_CHECK_ERROR(
-            err, "Failed to create program with clCreateProgramWithIL");
-
-        if (spec_const_def.spec_value != NULL)
-        {
-            err = clSetProgramSpecializationConstant(
-                prog, spec_const_def.spec_id, spec_const_def.spec_size,
-                spec_const_def.spec_value);
-            SPIRV_CHECK_ERROR(
-                err, "Failed to run clSetProgramSpecializationConstant");
-        }
-    }
-    else
-    {
-        cl_platform_id platform;
-        err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
-                              sizeof(cl_platform_id), &platform, NULL);
-        SPIRV_CHECK_ERROR(err,
-                          "Failed to get platform info with clGetDeviceInfo");
-        clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-
-        clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                platform, "clCreateProgramWithILKHR");
-        if (clCreateProgramWithILKHR == NULL)
-        {
-            log_error(
-                "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-            return -1;
-        }
-        prog = clCreateProgramWithILKHR(context, buffer, file_bytes, &err);
-        SPIRV_CHECK_ERROR(
-            err, "Failed to create program with clCreateProgramWithILKHR");
-    }
+    prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create program with clCreateProgramWithIL");
 
     err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);
     SPIRV_CHECK_ERROR(err, "Failed to build program");
@@ -200,27 +163,17 @@
     return err;
 }
 
-test_status InitCL(cl_device_id id)
+test_status checkAddressWidth(cl_device_id id)
 {
-    test_status spirv_status;
-    bool force = true;
-    spirv_status = check_spirv_compilation_readiness(id);
-    if (spirv_status != TEST_PASS)
-    {
-        return spirv_status;
-    }
+  cl_uint address_bits;
+  cl_uint err = clGetDeviceInfo(id, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &address_bits, NULL);
+  if(err != CL_SUCCESS){
+    log_error("clGetDeviceInfo failed to get address bits!");
+    return TEST_FAIL;
+  }
 
-    cl_uint address_bits;
-    cl_uint err = clGetDeviceInfo(id, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
-                                  &address_bits, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clGetDeviceInfo failed to get address bits!");
-        return TEST_FAIL;
-    }
-
-    gAddrWidth = address_bits == 32 ? "32" : "64";
-    return TEST_PASS;
+  gAddrWidth = address_bits == 32 ? "32" : "64";
+  return TEST_PASS;
 }
 
 void printUsage() {
@@ -257,8 +210,8 @@
        printUsage();
     }
 
-    return runTestHarnessWithCheck(
-        argc, argv, spirvTestsRegistry::getInstance().getNumTests(),
-        spirvTestsRegistry::getInstance().getTestDefinitions(), false, 0,
-        InitCL);
+    return runTestHarnessWithCheck(argc, argv,
+                          spirvTestsRegistry::getInstance().getNumTests(),
+                          spirvTestsRegistry::getInstance().getTestDefinitions(),
+                          false, 0, checkAddressWidth);
 }

diff --git a/test_conformance/spirv_new/procs.h b/test_conformance/spirv_new/procs.h
index 31c65a3..aa44cdd 100644
--- a/test_conformance/spirv_new/procs.h
+++ b/test_conformance/spirv_new/procs.h

@@ -1,16 +1,14 @@
 /******************************************************************
 Copyright (c) 2016 The Khronos Group Inc. All Rights Reserved.
 
-This code is protected by copyright laws and contains material proprietary to
-the Khronos Group, Inc. This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not
-be disclosed in whole or in part to third parties, and may not be reproduced,
-republished, distributed, transmitted, displayed, broadcast or otherwise
-exploited in any manner without the express prior written permission of Khronos
-Group. The receipt or possession of this code does not convey any rights to
-reproduce, disclose, or distribute its contents, or to manufacture, use, or sell
-anything that it may describe, in whole or in part other than under the terms of
-the Khronos Adopters Agreement or Khronos Conformance Test Source License
-Agreement as executed between Khronos and the recipient.
+This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc.
+This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to
+third parties, and may not be reproduced, republished, distributed, transmitted, displayed,
+broadcast or otherwise exploited in any manner without the express prior written permission
+of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce,
+disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe,
+in whole or in part other than under the terms of the Khronos Adopters Agreement
+or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient.
 ******************************************************************/
 
 #pragma once
@@ -26,17 +24,16 @@
 
 #include <vector>
 
-#define SPIRV_CHECK_ERROR(err, fmt, ...)                                       \
-    do                                                                         \
-    {                                                                          \
-        if (err == CL_SUCCESS) break;                                          \
-        log_error("%s(%d): Error %d\n" fmt "\n", __FILE__, __LINE__, err,      \
-                  ##__VA_ARGS__);                                              \
-        return -1;                                                             \
-    } while (0)
+#define SPIRV_CHECK_ERROR(err, fmt, ...) do {               \
+        if (err == CL_SUCCESS) break;                       \
+        log_error("%s(%d): Error %d\n" fmt "\n",            \
+                  __FILE__, __LINE__, err, ##__VA_ARGS__);  \
+        return -1;                                          \
+    } while(0)
 
 
-class baseTestClass {
+class baseTestClass
+{
 public:
     baseTestClass() {}
     virtual basefn getFunction() = 0;
@@ -48,53 +45,54 @@
     std::vector<test_definition> testDefinitions;
 
 public:
-    static spirvTestsRegistry &getInstance();
+
+    static spirvTestsRegistry& getInstance();
 
     test_definition *getTestDefinitions();
 
     size_t getNumTests();
 
-    void addTestClass(baseTestClass *test, const char *testName,
-                      Version version);
+    void addTestClass(baseTestClass *test, const char *testName);
     spirvTestsRegistry() {}
 };
 
-template <typename T> T *createAndRegister(const char *name, Version version)
+template<typename T>
+T* createAndRegister(const char *name)
 {
     T *testClass = new T();
-    spirvTestsRegistry::getInstance().addTestClass((baseTestClass *)testClass,
-                                                   name, version);
+    spirvTestsRegistry::getInstance().addTestClass((baseTestClass *)testClass, name);
     return testClass;
 }
 
-#define TEST_SPIRV_FUNC_VERSION(name, version)                                 \
-    extern int test_##name(cl_device_id deviceID, cl_context context,          \
-                           cl_command_queue queue, int num_elements);          \
-    class test_##name##_class : public baseTestClass {                         \
-    private:                                                                   \
-        basefn fn;                                                             \
-                                                                               \
-    public:                                                                    \
-        test_##name##_class(): fn(test_##name) {}                              \
-        basefn getFunction() { return fn; }                                    \
-    };                                                                         \
-    test_##name##_class *var_##name =                                          \
-        createAndRegister<test_##name##_class>(#name, version);                \
-    int test_##name(cl_device_id deviceID, cl_context context,                 \
-                    cl_command_queue queue, int num_elements)
+#define TEST_SPIRV_FUNC(name)                           \
+    extern int test_##name(cl_device_id deviceID,       \
+                           cl_context context,          \
+                           cl_command_queue queue,      \
+                           int num_elements);           \
+    class test_##name##_class  : public baseTestClass   \
+    {                                                   \
+    private:                                            \
+        basefn fn;                                      \
+                                                        \
+    public:                                             \
+    test_##name##_class() : fn(test_##name)             \
+        {                                               \
+        }                                               \
+        basefn getFunction()                            \
+        {                                               \
+            return fn;                                  \
+        }                                               \
+    };                                                  \
+    test_##name##_class *var_##name =                   \
+        createAndRegister<test_##name##_class>(#name);  \
+    int test_##name(cl_device_id deviceID,              \
+                    cl_context context,                 \
+                    cl_command_queue queue,             \
+                    int num_elements)
 
-#define TEST_SPIRV_FUNC(name) TEST_SPIRV_FUNC_VERSION(name, Version(1, 2))
+std::vector<unsigned char> readSPIRV(const char *file_name);
 
-struct spec_const
-{
-    spec_const(cl_int id = 0, size_t sizet = 0, const void *value = NULL)
-        : spec_id(id), spec_size(sizet), spec_value(value){};
-    cl_int spec_id;
-    size_t spec_size;
-    const void *spec_value;
-};
-
-int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID,
-                        const cl_context context, const char *prog_name,
-                        spec_const spec_const_def = spec_const());
-std::vector<unsigned char> readSPIRV(const char *file_name);
\ No newline at end of file
+int get_program_with_il(clProgramWrapper &prog,
+                        const cl_device_id deviceID,
+                        const cl_context context,
+                        const char *prog_name);

diff --git a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32 b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32
index 4442255..2336201 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "atomic_dec_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_dec_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -17,13 +17,13 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64 b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64
index 836a7af..683a0a5 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "atomic_dec_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_dec_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -18,7 +18,7 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
    %ulong_32 = OpConstant %ulong 32
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32 b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32
index 2aa163e..22ee479 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "atomic_inc_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_inc_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -17,13 +17,13 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64 b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64
index 2a96efd..cc59bb9 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "atomic_inc_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_inc_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -18,7 +18,7 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
    %ulong_32 = OpConstant %ulong 32
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32 b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32
index 5b5991a..57419c1 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64 b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64
index 41983a1..44e50fc 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32 b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32
index daeba30..ce37a4d 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional_weighted" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional_weighted"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64 b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64
index 5238726..d30c281 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional_weighted" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional_weighted"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32
index 7978899..07b6cd2 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "branch_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64
index bf44d36..1b24c5d 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "branch_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_simple"
                OpName %in "in"
                OpName %out "out"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32 b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32
index 709d07c..2b6d7ad 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
@@ -24,7 +24,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64 b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64
index ce89547..09f6d9b 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
@@ -27,7 +27,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32 b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32
index 8ff67a2..e277912 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_struct" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_struct"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -29,7 +29,7 @@
 %uint_2100480000 = OpConstant %uint 2100480000
 %uint_2100480000_0 = OpConstant %uint 2100480000
          %18 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %19 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64 b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64
index 6d98de6..ed5422d 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_struct" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_struct"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -32,7 +32,7 @@
 %uint_2100480000 = OpConstant %uint 2100480000
 %uint_2100480000_0 = OpConstant %uint 2100480000
          %20 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %21 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32
index 40a6b56..7931958 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64
index 3deb9c4..a356232 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32
index 6d00bf5..3d2651f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64
index ed87e28..807916d 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32
index 0f7d44c..19c2b51 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -23,7 +23,7 @@
       %false = OpConstantFalse %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64
index 9a4923c..1c1980e 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -26,7 +26,7 @@
       %false = OpConstantFalse %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %16 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32
index 85cbca7..3286c75 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64
index 5b7672f..f91b2bf 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32
index 47a0ac9..59e4e2c 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64
index 7a9f1e8..4004708 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
@@ -25,7 +25,7 @@
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32
index 46934c0..b80809f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
@@ -23,7 +23,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %13 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64
index 25516d2..2c30216 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
@@ -27,7 +27,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %16 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32
index 62ba1d8..7e6c2bb 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
@@ -25,7 +25,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %15 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %16 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64
index 3824090..7c5b5fb 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
@@ -28,7 +28,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %17 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %18 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32
index a5091de..c5bd8bf 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64
index 75362b8..1ddb08a 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32
index adf712a..e0a8b6f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64
index 28e856e..7091209 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32
index 2f28026..3b53e16 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64
index d3fc51e..d95032b 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32
index ebe84ad..cf617c6 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_9 = OpTypeStruct %uint %uchar
@@ -25,7 +25,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %14 = OpConstantComposite %_struct_9 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64
index bc37627..2554667 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -28,7 +28,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %16 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32
index 0c0e0e0..becd9e6 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Kernel
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
   %_struct_9 = OpTypeStruct %uint %float
@@ -24,7 +24,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %14 = OpConstantComposite %_struct_9 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64
index f06f277..96d85db 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %float = OpTypeFloat 32
@@ -27,7 +27,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %16 = OpConstantComposite %_struct_10 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32
index 62763d8..23ae86e 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -31,7 +31,7 @@
          %18 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
          %19 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %20 = OpConstantComposite %_struct_11 %19 %18
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %21 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64
index 277b370..3c3cf2e 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -34,7 +34,7 @@
          %20 = OpConstantComposite %_struct_11 %uint_2100483600 %uchar_128
          %21 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %22 = OpConstantComposite %_struct_12 %21 %20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %23 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32
index b135e13..8b3733d 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -23,7 +23,7 @@
        %true = OpConstantTrue %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64
index 3fdd7a2..da72dc6 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -26,7 +26,7 @@
        %true = OpConstantTrue %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %16 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32
index c40a600..a62d571 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64
index c55fe28..bba2551 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32
index 3f430dc..5c8de26 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64
index d64fed7..1271b0f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32
index 04b5054..b69d82a 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64
index f631976..a53f902 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32
index 0cfda35..ed309bf 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64
index 6324051..be52174 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32
index 5545194..5ad09c7 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64
index e933f20..6faa144 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32
index 4b8eb83..3023c94 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64
index 49837d8..3fa7f96 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32
index 268d70e..d7ef29c 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64
index 0fd6111..4594c1c 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32
index f924afc..b0af90a 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64
index b4babdf..b9eeca9 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
@@ -25,7 +25,7 @@
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32
index d8424d2..2f23b9d 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
@@ -23,7 +23,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %13 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64
index d96630b..0ab8b1d 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
@@ -27,7 +27,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %16 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32
index 1710d73..f7633a9 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
@@ -25,7 +25,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %15 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %16 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64
index 1f82ba6..6241114 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
@@ -28,7 +28,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %17 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %18 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32
index 39011d5..c6fa218 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64
index 11afdb7..e068f8c 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32
index f4d521e..52411f2 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64
index 36a50ed..4349ed3 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32
index b735f0a..924e675 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64
index 760f0c6..0be76e2 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32
index f12b288..1d6c526 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_9 = OpTypeStruct %uint %uchar
@@ -25,7 +25,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %14 = OpConstantComposite %_struct_9 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64
index 717244a..e0387e4 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -28,7 +28,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %16 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32
index b781cd9..6c0fb9d 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Kernel
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
   %_struct_9 = OpTypeStruct %uint %float
@@ -24,7 +24,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %14 = OpConstantComposite %_struct_9 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64
index 6a9a898..13b549a 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %float = OpTypeFloat 32
@@ -27,7 +27,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %16 = OpConstantComposite %_struct_10 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32
index 7f8277f..978e921 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Int8
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -31,7 +31,7 @@
          %18 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
          %19 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %20 = OpConstantComposite %_struct_11 %19 %18
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %21 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64
index 9573ada..29f4b46 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -34,7 +34,7 @@
          %20 = OpConstantComposite %_struct_11 %uint_2100483600 %uchar_128
          %21 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %22 = OpConstantComposite %_struct_12 %21 %20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %23 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32
index ca50b9f..0450ba0 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64
index 70726a9..87072e3 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32
index b10ca7d..e36766f 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64
index ace794d..c0af753 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32
index 91b60fc..678a5f3 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64
index 5731a0a..caf6e55 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32
index 96d4f29..f6fbcdb 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64
index cfce560..fd6b1c2 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32
index ea571a1..94bf154 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_aliased" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_aliased"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64
index 1e68600..e57a5ff 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_aliased" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_aliased"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32
index 51ed0e3..b2a2ae2 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_alignment" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_alignment"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64
index e7b4bde..f6bb852 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_alignment" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_alignment"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm32
new file mode 100644
index 0000000..eddcdb3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm32

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_coherent"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit6 "vecinit6"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call7_old "call7.old"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Coherent
+               OpDecorate %19 Constant
+         %19 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %19 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+    %uint_16 = OpConstant %uint 16
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %29 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %28
+     %v2uint = OpTypeVector %uint 2
+         %31 = OpTypeSampler
+         %32 = OpTypeSampledImage %28
+      %float = OpTypeFloat 32
+         %34 = OpConstantSampler %31 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %29
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %28
+      %entry = OpLabel
+         %36 = OpUndef %v2uint
+         %37 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %37 0
+         %38 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %38 1
+         %39 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %39 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %36 0
+   %vecinit6 = OpCompositeInsert %v2uint %call %vecinit 1
+%TempSampledImage = OpSampledImage %32 %src %34
+  %call7_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit6 Lod %float_0
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %add
+               OpStore %arrayidx %call7_old Aligned 16
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm64
new file mode 100644
index 0000000..d86a984
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm64

@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_coherent"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit10 "vecinit10"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call11_old "call11.old"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Coherent
+               OpDecorate %24 Constant
+         %24 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %24 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+      %ulong = OpTypeInt 64 0
+    %uint_16 = OpConstant %uint 16
+   %ulong_32 = OpConstant %ulong 32
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %35 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %36 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %35
+     %v2uint = OpTypeVector %uint 2
+         %38 = OpTypeSampler
+         %39 = OpTypeSampledImage %35
+      %float = OpTypeFloat 32
+         %41 = OpConstantSampler %38 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %36
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %35
+      %entry = OpLabel
+         %43 = OpUndef %v2uint
+         %44 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %44 0
+       %conv = OpUConvert %uint %call
+         %45 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %45 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %46 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %46 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %43 0
+  %vecinit10 = OpCompositeInsert %v2uint %conv %vecinit 1
+%TempSampledImage = OpSampledImage %39 %src %41
+ %call11_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit10 Lod %float_0
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %idxprom
+               OpStore %arrayidx %call11_old Aligned 16
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32
index 2bfd5a2..482526d 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64
index a1408ae..e2c5547 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32
index a5a71f6..bf5c981 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64
index df02172..9d8bc9d 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32
index 5aa89a4..4150342 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_cpacked" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_cpacked"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %_struct_4 CPacked
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_4 = OpTypeStruct %uint %uchar
@@ -26,7 +26,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_127 = OpConstant %uchar 127
          %14 = OpConstantComposite %_struct_4 %uint_2100483600 %uchar_127
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_4
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64
index 41dee76..aa42b25 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_cpacked" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_cpacked"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,7 +18,7 @@
                OpDecorate %_struct_4 CPacked
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -29,7 +29,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_127 = OpConstant %uchar 127
          %16 = OpConstantComposite %_struct_4 %uint_2100483600 %uchar_127
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_4
          %17 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm32
new file mode 100644
index 0000000..92fdb89
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm32

@@ -0,0 +1,66 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonreadable"
+               OpSource OpenCL_C 100000
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit3 "vecinit3"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %dst NonReadable
+               OpDecorate %16 Constant
+         %16 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %src FuncParamAttr NoCapture
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpGroupDecorate %16 %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+         %21 = OpTypeImage %void 2D 0 0 0 0 Unknown WriteOnly
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %24 = OpTypeFunction %void %21 %_ptr_CrossWorkgroup_v4uint
+     %v2uint = OpTypeVector %uint 2
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %24
+        %dst = OpFunctionParameter %21
+        %src = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+      %entry = OpLabel
+         %26 = OpUndef %v2uint
+         %27 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %27 0
+         %28 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %28 1
+         %29 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %29 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %26 0
+   %vecinit3 = OpCompositeInsert %v2uint %call %vecinit 1
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %src %add
+         %30 = OpLoad %v4uint %arrayidx Aligned 16
+               OpImageWrite %dst %vecinit3 %30
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm64
new file mode 100644
index 0000000..596cc49
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm64

@@ -0,0 +1,79 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonreadable"
+               OpSource OpenCL_C 100000
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit7 "vecinit7"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %dst NonReadable
+               OpDecorate %21 Constant
+         %21 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %src FuncParamAttr NoCapture
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpGroupDecorate %21 %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+   %ulong_32 = OpConstant %ulong 32
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown WriteOnly
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %31 = OpTypeFunction %void %28 %_ptr_CrossWorkgroup_v4uint
+     %v2uint = OpTypeVector %uint 2
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %31
+        %dst = OpFunctionParameter %28
+        %src = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+      %entry = OpLabel
+         %33 = OpUndef %v2uint
+         %34 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %34 0
+       %conv = OpUConvert %uint %call
+         %35 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %35 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %36 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %36 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %33 0
+   %vecinit7 = OpCompositeInsert %v2uint %conv %vecinit 1
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %src %idxprom
+         %37 = OpLoad %v4uint %arrayidx Aligned 16
+               OpImageWrite %dst %vecinit7 %37
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm32
new file mode 100644
index 0000000..e4b25e3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm32

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonwritable"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit6 "vecinit6"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call7_old "call7.old"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src NonWritable
+               OpDecorate %19 Constant
+         %19 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %19 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+    %uint_16 = OpConstant %uint 16
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %29 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %28
+     %v2uint = OpTypeVector %uint 2
+         %31 = OpTypeSampler
+         %32 = OpTypeSampledImage %28
+      %float = OpTypeFloat 32
+         %34 = OpConstantSampler %31 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %29
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %28
+      %entry = OpLabel
+         %36 = OpUndef %v2uint
+         %37 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %37 0
+         %38 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %38 1
+         %39 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %39 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %36 0
+   %vecinit6 = OpCompositeInsert %v2uint %call %vecinit 1
+%TempSampledImage = OpSampledImage %32 %src %34
+  %call7_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit6 Lod %float_0
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %add
+               OpStore %arrayidx %call7_old Aligned 16
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm64
new file mode 100644
index 0000000..0f52968
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm64

@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonwritable"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit10 "vecinit10"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call11_old "call11.old"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src NonWritable
+               OpDecorate %24 Constant
+         %24 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %24 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+      %ulong = OpTypeInt 64 0
+    %uint_16 = OpConstant %uint 16
+   %ulong_32 = OpConstant %ulong 32
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %35 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %36 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %35
+     %v2uint = OpTypeVector %uint 2
+         %38 = OpTypeSampler
+         %39 = OpTypeSampledImage %35
+      %float = OpTypeFloat 32
+         %41 = OpConstantSampler %38 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %36
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %35
+      %entry = OpLabel
+         %43 = OpUndef %v2uint
+         %44 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %44 0
+       %conv = OpUConvert %uint %call
+         %45 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %45 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %46 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %46 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %43 0
+  %vecinit10 = OpCompositeInsert %v2uint %conv %vecinit 1
+%TempSampledImage = OpSampledImage %39 %src %41
+ %call11_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit10 Lod %float_0
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %idxprom
+               OpStore %arrayidx %call11_old Aligned 16
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32
index 7c06103..f05e916 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_restrict" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_restrict"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64
index 5bdaf73..b9181c9 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_restrict" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_restrict"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32
index 32b6fe8..e57fe7b 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTE
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64
index b7b3754..7b56ca2 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTE
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32
index 45e840a..9a93f23 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTE
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64
index 771975e..45809d6 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTE
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32
index 6ca2023..30a398e 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTN
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64
index 13b3e2b..a164bd5 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTN
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32
index 60e16d0..1db15d3 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTN
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64
index 41fb1c7..8b8ee77 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTN
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32
index a2b097a..e6d80eb 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTP
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64
index 4eaa81d..b7f5915 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTP
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32
index cce1cbc..09b4b98 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTP
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64
index 6f315db..5a74457 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTP
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32
index d9dfe72..8eb632d 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTZ
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64
index 7738138..7c5c346 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTZ
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32
index 288df6d..21b4d4f 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTZ
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64
index 728d8bb..6f0b123 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTZ
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32
index 5437067..4aa86c1 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64
index ba4d649..e102b84 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32
index 3fa47c9..1e2e7bd 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64
index 8609e20..7ea8af9 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32
index dbb3b44..c37a35b 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64
index 2915c12..02bb819 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32
index 9bffb68..b0b52ed 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64
index 354639f..19c1415 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32
index 0672489..1426f11 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64
index 7d9efb0..cd597d7 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32
index ffbb417..56555aa 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64
index 317f992..72bf0e0 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm32
new file mode 100644
index 0000000..94b1716
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm32

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_volatile"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit6 "vecinit6"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call7_old "call7.old"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Volatile
+               OpDecorate %19 Constant
+         %19 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %19 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+    %uint_16 = OpConstant %uint 16
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %29 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %28
+     %v2uint = OpTypeVector %uint 2
+         %31 = OpTypeSampler
+         %32 = OpTypeSampledImage %28
+      %float = OpTypeFloat 32
+         %34 = OpConstantSampler %31 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %29
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %28
+      %entry = OpLabel
+         %36 = OpUndef %v2uint
+         %37 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %37 0
+         %38 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %38 1
+         %39 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %39 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %36 0
+   %vecinit6 = OpCompositeInsert %v2uint %call %vecinit 1
+%TempSampledImage = OpSampledImage %32 %src %34
+  %call7_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit6 Lod %float_0
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %add
+               OpStore %arrayidx %call7_old Aligned 16
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm64
new file mode 100644
index 0000000..9a151da
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm64

@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_volatile"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit10 "vecinit10"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call11_old "call11.old"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Volatile
+               OpDecorate %24 Constant
+         %24 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %24 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+      %ulong = OpTypeInt 64 0
+    %uint_16 = OpConstant %uint 16
+   %ulong_32 = OpConstant %ulong 32
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %35 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %36 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %35
+     %v2uint = OpTypeVector %uint 2
+         %38 = OpTypeSampler
+         %39 = OpTypeSampledImage %35
+      %float = OpTypeFloat 32
+         %41 = OpConstantSampler %38 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %36
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %35
+      %entry = OpLabel
+         %43 = OpUndef %v2uint
+         %44 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %44 0
+       %conv = OpUConvert %uint %call
+         %45 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %45 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %46 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %46 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %43 0
+  %vecinit10 = OpCompositeInsert %v2uint %conv %vecinit 1
+%TempSampledImage = OpSampledImage %39 %src %41
+ %call11_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit10 Lod %float_0
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %idxprom
+               OpStore %arrayidx %call11_old Aligned 16
+               OpReturn
+               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32
index b76c3a3..e131176 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64
index 1fa09e9..26b81ed 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32
index f1320bb..be55242 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64
index ad04b78..3cddeea 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32
index 8b41542..0844d73 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64
index 0661442..1a7cd92 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32
index 3c6aebd..a9ffa99 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64
index d85f61a..73ca88b 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32
index baf2707..0642373 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 22
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -25,11 +25,11 @@
        %uint = OpTypeInt 32 0
      %uint_0 = OpConstant %uint 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64
index 7be5e17..654b497 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 26
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -27,11 +27,11 @@
        %uint = OpTypeInt 32 0
      %uint_0 = OpConstant %uint 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32
index 49e49da..4e80aef 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 25
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -25,11 +25,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64
index 572b724..82c83af 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 30
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -27,11 +27,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32
index 764f1c4..bd8f3aa 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 25
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -25,11 +25,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64
index 325f413..a0e8a75 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 30
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -27,11 +27,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32
index f2f28a1..9dc688e 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpDecorate %8 NoSignedWrap
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64
index 159629a..a010d1c 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32
index e2dc884..43ce257 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpDecorate %8 NoUnsignedWrap
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64
index 4dfdc80..e68e559 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64

@@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32
index ee4be5a..9a4a933 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64
index 7a7f3e1..9e51b19 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32
index b8cab7e..111d6c8 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64
index a4571db..b77917c 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32
index 482f9a7..ef7d045 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64
index 21d5977..15b5652 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32
index d3c0e6d..6f318ad 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64
index 76519c3..7c83efa 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32
index f818cb4..7f9ff11 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64
index 1af4127..7f069cc 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32
index 6e25c8f..eae4280 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64
index efbd1e2..613178f 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32
index 5169102..dff1bd9 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64
index 9b73c4f..0906974 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32
index 69efe6f..f12a75f 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64
index 06676f9..d9f7e37 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32
index 3f1324a..2e494d9 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64
index aeab8e8..bcfbee7 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32
index 35c8c87..03c56e5 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64
index 9630ec1..1001b32 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32
index 06d65d2..477f7b3 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64
index 92121ec..a5c4cd4 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32
index f50bb99..c87a039 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64
index bdaa98e..8fd6538 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32
index d1e7961..89e6708 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64
index ce085d5..e9262b1 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32
index 60235ea..e23bce8 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64
index 58663d6..aaf4206 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32
index 1884299..68f0557 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64
index 62798c2..9778bc0 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32
index a9c4779..38db8d8 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64
index 308f2d9..40b13b0 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32
index 2101c7f..0d63134 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64
index 05d0113..5cb1dae 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32
index 284739f..0b32b87 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64
index f082b9f..e10a425 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32
index ca9a1f3..1884e58 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64
index f1e9b99..63c5dd7 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32
index 1cdf4a4..efeee4e 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64
index 0061e32..a944ced 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32
index 091fb08..059bcb7 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64
index 4675c1d..e9372cd 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32
index ef8050b..6b6df42 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64
index a92e95b..afcd086 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32
index 5cfd0cd..8a67ea3 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64
index 36b631d..4b08706 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32
index 0b0b6e8..d2f06b4 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64
index 55cc605..efecb17 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32
index e4a8b8a..f6fc04e 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64
index 8cfe36a..0608963 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32
index b9d3d9f..a8f161f 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64
index 8cdbfe3..36beb94 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32
index f892275..6a3aa18 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64
index 3ac187d..1c73bc9 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32
index 70f93da..95d58eb 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64
index 603974d..ff1089f 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32
index a992d6e..039864f 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64
index 2f3ad1b..4b7ace2 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32
index 09112d7..c8698a4 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64
index 4fd41e2..7889c84 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half

diff --git a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32
index e5b826b..5fd4630 100644
--- a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "label_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "label_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64
index e496c25..2c403fa 100644
--- a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "label_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "label_simple"
                OpName %in "in"
                OpName %out "out"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32
index df2cb59..644607a 100644
--- a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "lifetime_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "lifetime_simple"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64
index d24242f..06e4b81 100644
--- a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "lifetime_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "lifetime_simple"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32 b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32
index 79217e7..0171ce3 100644
--- a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "test_linkage" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "test_linkage"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %4 LinkageAttributes "simple_fnegate_linkage" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %4 = OpFunction %float Const %12
          %13 = OpFunctionParameter %float
                OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64 b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64
index 707d853..c3212a2 100644
--- a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "test_linkage" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "test_linkage"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %4 LinkageAttributes "simple_fnegate_linkage" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %13 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %4 = OpFunction %float Const %13
          %14 = OpFunctionParameter %float
                OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32
index 790dfa8..d9a7a9b 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64
index d7e21e4..286cbd8 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32
index 50ccc63..2e556de 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64
index c6e88ed..cef58ed 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32
index ad94ac0..34e0d4a 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64
index b6b6f7a..7abaa6f 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32
index e6ef6b6..efa6872 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64
index 932ca39..6f58f2c 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32
index e545284..b925d5d 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64
index e123666..3164d2b 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32
index 4358262..f5387ab 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64
index 7a9520a..fb6fcb6 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32
index 9dc1561..b6640b8 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_const" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_const"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float Const %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64
index 58a497c..9704356 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_const" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_const"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float Const %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32
index f344df3..6067d66 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_inline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_inline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float Inline %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64
index ee4e9c9..f5b4a7a 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_inline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_inline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float Inline %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32
index 8c93a49..2c6b8e0 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_noinline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_noinline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float DontInline %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64
index cdf1990..67db809 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_noinline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_noinline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float DontInline %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32
index cd4345b..634df87 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_none"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float None %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64
index f242ae7..451f9b7 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_none"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float None %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32
index 4922f67..f4e3d67 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float Pure %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64
index 8c792ce..7ee623d 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float Pure %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32
index b8f958d..deab3d7 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure_ptr" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure_ptr"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
@@ -23,7 +23,7 @@
     %uint_32 = OpConstant %uint 32
          %12 = OpTypeFunction %float %_ptr_CrossWorkgroup_float %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %uint %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %14 = OpFunction %float Pure %12
          %15 = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %16 = OpFunctionParameter %uint

diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64
index 4a5896d..3ebe9e4 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure_ptr" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure_ptr"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
@@ -24,7 +24,7 @@
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %_ptr_CrossWorkgroup_float %ulong
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %ulong %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %14 = OpFunction %float Pure %12
          %15 = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %16 = OpFunctionParameter %ulong

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32
index 868565c..8864d09 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_double" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_double"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64
index 38e71c9..0e012e8 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_double" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_double"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32
index e3cd3c9..90e5d6f 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64
index 085d114..d3b6ce2 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32
index c65b7e9..8752e74 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64
index 16c647b..6909517 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32
index 98bf6f7..ed59bd7 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64
index 6dd86da..241871b 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32
index 61042e4..0e111b0 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64
index e741ef7..e1782d6 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32
index ab9d29d..c513fd3 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64
index 2eeb565..edee512 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32
index 18e7040..8e59eef 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64
index 546b104..ca75370 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32
index 8608e3a..ef8f7b0 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64
index bffdfb7..f796379 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32
index caf5bee..07b900b 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64
index f39c703..7eba008 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32
index a94c8c5..f88819d 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64
index 0cb309f..41dcc9f 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32
index 15d5284..3718b91 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64
index e250711..0d371c3 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm32
deleted file mode 100644
index 00cd989..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm32
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.double*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %double_0 SpecId 101
-     %double = OpTypeFloat 64
-%_ptr_UniformConstant_double = OpTypePointer UniformConstant %double
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-   %double_0 = OpSpecConstant %double 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_double UniformConstant %double_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_double
-      %entry = OpLabel
-         %12 = OpLoad %double %output_value Aligned 8
-         %13 = OpLoad %double %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpFAdd %double %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm64
deleted file mode 100644
index b50501b..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm64
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.double*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %double_0 SpecId 101
-     %double = OpTypeFloat 64
-%_ptr_UniformConstant_double = OpTypePointer UniformConstant %double
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-   %double_0 = OpSpecConstant %double 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_double UniformConstant %double_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_double
-      %entry = OpLabel
-         %12 = OpLoad %double %output_value Aligned 8
-         %13 = OpLoad %double %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpFAdd %double %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm32
deleted file mode 100644
index 8492474..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm32
+++ /dev/null

@@ -1,55 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %false SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-      %false = OpSpecConstantFalse %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %false
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_then %if_end
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm64
deleted file mode 100644
index c76bccc..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm64
+++ /dev/null

@@ -1,55 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %false SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-      %false = OpSpecConstantFalse %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %false
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_then %if_end
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm32
deleted file mode 100644
index b3163af..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm32
+++ /dev/null

@@ -1,35 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.float*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %float_0 SpecId 101
-      %float = OpTypeFloat 32
-%_ptr_UniformConstant_float = OpTypePointer UniformConstant %float
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-    %float_0 = OpSpecConstant %float 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_float UniformConstant %float_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_float
-      %entry = OpLabel
-         %12 = OpLoad %float %output_value Aligned 4
-         %13 = OpLoad %float %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpFAdd %float %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm64
deleted file mode 100644
index 006cdc7..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm64
+++ /dev/null

@@ -1,35 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.float*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %float_0 SpecId 101
-      %float = OpTypeFloat 32
-%_ptr_UniformConstant_float = OpTypePointer UniformConstant %float
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-    %float_0 = OpSpecConstant %float 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_float UniformConstant %float_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_float
-      %entry = OpLabel
-         %12 = OpLoad %float %output_value Aligned 4
-         %13 = OpLoad %float %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpFAdd %float %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm32
deleted file mode 100644
index 2195ebe..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm32
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float16Buffer
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.half*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %half_0x0p_0 SpecId 101
-       %half = OpTypeFloat 16
-%_ptr_UniformConstant_half = OpTypePointer UniformConstant %half
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_half
-%half_0x0p_0 = OpSpecConstant %half 0x0p+0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_half UniformConstant %half_0x0p_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_half
-      %entry = OpLabel
-         %12 = OpLoad %half %output_value Aligned 2
-         %13 = OpLoad %half %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpFAdd %half %12 %13
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm64
deleted file mode 100644
index 47dc418..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm64
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float16Buffer
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.half*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %half_0x0p_0 SpecId 101
-       %half = OpTypeFloat 16
-%_ptr_UniformConstant_half = OpTypePointer UniformConstant %half
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_half
-%half_0x0p_0 = OpSpecConstant %half 0x0p+0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_half UniformConstant %half_0x0p_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_half
-      %entry = OpLabel
-         %12 = OpLoad %half %output_value Aligned 2
-         %13 = OpLoad %half %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpFAdd %half %12 %13
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm32
deleted file mode 100644
index 834b85d..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm32
+++ /dev/null

@@ -1,55 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %true SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-       %true = OpSpecConstantTrue %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %true
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_end %if_then
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm64
deleted file mode 100644
index 83ce4d6..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm64
+++ /dev/null

@@ -1,55 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %true SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-       %true = OpSpecConstantTrue %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %true
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_end %if_then
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm32
deleted file mode 100644
index 0e832b4..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm32
+++ /dev/null

@@ -1,37 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 1
-               OpDecorate %uchar_0 SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpSpecConstant %uchar 0
-%_ptr_UniformConstant_uchar = OpTypePointer UniformConstant %uchar
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uchar UniformConstant %uchar_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-      %entry = OpLabel
-         %12 = OpLoad %uchar %output_value Aligned 1
-         %15 = OpLoad %uchar %spec_const_kernel_spec_constant_value Aligned 1
-        %add = OpIAdd %uchar %12 %15
-               OpStore %output_value %add Aligned 1
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm64
deleted file mode 100644
index 89150fc..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm64
+++ /dev/null

@@ -1,37 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 1
-               OpDecorate %uchar_0 SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpSpecConstant %uchar 0
-%_ptr_UniformConstant_uchar = OpTypePointer UniformConstant %uchar
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uchar UniformConstant %uchar_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-      %entry = OpLabel
-         %12 = OpLoad %uchar %output_value Aligned 1
-         %15 = OpLoad %uchar %spec_const_kernel_spec_constant_value Aligned 1
-        %add = OpIAdd %uchar %12 %15
-               OpStore %output_value %add Aligned 1
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm32
deleted file mode 100644
index 7bf0f12..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm32
+++ /dev/null

@@ -1,35 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.uint*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %uint_0 SpecId 101
-       %uint = OpTypeInt 32 0
-     %uint_0 = OpSpecConstant %uint 0
-%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-      %entry = OpLabel
-         %12 = OpLoad %uint %output_value Aligned 4
-         %13 = OpLoad %uint %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpIAdd %uint %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm64
deleted file mode 100644
index a73bf24..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm64
+++ /dev/null

@@ -1,35 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.uint*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %uint_0 SpecId 101
-       %uint = OpTypeInt 32 0
-     %uint_0 = OpSpecConstant %uint 0
-%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-      %entry = OpLabel
-         %12 = OpLoad %uint %output_value Aligned 4
-         %13 = OpLoad %uint %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpIAdd %uint %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm32
deleted file mode 100644
index c4e2ef7..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm32
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.ulong*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %ulong_0 SpecId 101
-      %ulong = OpTypeInt 64 0
-    %ulong_0 = OpSpecConstant %ulong 0
-%_ptr_UniformConstant_ulong = OpTypePointer UniformConstant %ulong
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ulong UniformConstant %ulong_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
-      %entry = OpLabel
-         %12 = OpLoad %ulong %output_value Aligned 8
-         %13 = OpLoad %ulong %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpIAdd %ulong %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm64
deleted file mode 100644
index 3ec3a8f..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm64
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.ulong*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %ulong_0 SpecId 101
-      %ulong = OpTypeInt 64 0
-    %ulong_0 = OpSpecConstant %ulong 0
-%_ptr_UniformConstant_ulong = OpTypePointer UniformConstant %ulong
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ulong UniformConstant %ulong_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
-      %entry = OpLabel
-         %12 = OpLoad %ulong %output_value Aligned 8
-         %13 = OpLoad %ulong %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpIAdd %ulong %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm32
deleted file mode 100644
index d501207..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm32
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int16
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.ushort*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %ushort_0 SpecId 101
-     %ushort = OpTypeInt 16 0
-       %uint = OpTypeInt 32 0
-   %ushort_0 = OpSpecConstant %ushort 0
-%_ptr_UniformConstant_ushort = OpTypePointer UniformConstant %ushort
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ushort UniformConstant %ushort_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
-      %entry = OpLabel
-         %12 = OpLoad %ushort %output_value Aligned 2
-         %15 = OpLoad %ushort %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpIAdd %ushort %12 %15
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm64
deleted file mode 100644
index 27f5fb8..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm64
+++ /dev/null

@@ -1,36 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int16
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.ushort*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %ushort_0 SpecId 101
-     %ushort = OpTypeInt 16 0
-       %uint = OpTypeInt 32 0
-   %ushort_0 = OpSpecConstant %ushort 0
-%_ptr_UniformConstant_ushort = OpTypePointer UniformConstant %ushort
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ushort UniformConstant %ushort_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
-      %entry = OpLabel
-         %12 = OpLoad %ushort %output_value Aligned 2
-         %15 = OpLoad %ushort %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpIAdd %ushort %12 %15
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd

diff --git a/test_conformance/spirv_new/spirv_asm/opaque.spvasm32 b/test_conformance/spirv_new/spirv_asm/opaque.spvasm32
index e9a0a5a..ef133c1 100644
--- a/test_conformance/spirv_new/spirv_asm/opaque.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/opaque.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "opaque" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "opaque"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %4 LinkageAttributes "opaque_store" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %Opaque_opaque_t = OpTypeOpaque "opaque_t"
 %_ptr_CrossWorkgroup_Opaque_opaque_t = OpTypePointer CrossWorkgroup %Opaque_opaque_t
 %float_3_14159274 = OpConstant %float 3.14159274
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_Opaque_opaque_t
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_Opaque_opaque_t %uint %float
           %4 = OpFunction %void None %14
          %15 = OpFunctionParameter %_ptr_CrossWorkgroup_Opaque_opaque_t

diff --git a/test_conformance/spirv_new/spirv_asm/opaque.spvasm64 b/test_conformance/spirv_new/spirv_asm/opaque.spvasm64
index 0759c28..50ef5a6 100644
--- a/test_conformance/spirv_new/spirv_asm/opaque.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/opaque.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "opaque" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "opaque"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,12 +17,12 @@
                OpDecorate %4 LinkageAttributes "opaque_store" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
    %ulong_32 = OpConstant %ulong 32
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
 %Opaque_opaque_t = OpTypeOpaque "opaque_t"
 %_ptr_CrossWorkgroup_Opaque_opaque_t = OpTypePointer CrossWorkgroup %Opaque_opaque_t
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_Opaque_opaque_t

diff --git a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32 b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32
index 7a6311a..adcfac7 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "phi_2" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_2"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,12 +19,12 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64 b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64
index 538c5f7..86ef7c3 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "phi_2" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_2"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,14 +20,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32 b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32
index fef5c24..61ce608 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "phi_3" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_3"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64 b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64
index 9ea0fc8..4029dd6 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "phi_3" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_3"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,14 +21,14 @@
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
    %ulong_32 = OpConstant %ulong 32
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32 b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32
index 575ff3d..3ed6808 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "phi_4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_4"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64 b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64
index ffa1fc1..ab07cad 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "phi_4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_4"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -28,7 +28,7 @@
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32
index 53883e3..d3baef2 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_if_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64
index 5939b71..5dd2d99 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_if_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32
index 0b05732..3e9fa20 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_if_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64
index e3e4cf9..f246ece 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_if_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32
index b821788..1aca937 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_if_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64
index 6a45bce..47526e9 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_if_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32
index fa459f7..b1b6e04 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,7 +19,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -30,7 +30,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64
index cae44be..47b97b9 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -33,7 +33,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %21
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32
index 441fafb..3684162 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,7 +19,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -30,7 +30,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64
index 9e56514..1cbfc28 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -33,7 +33,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %21
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32
index ddaf22a..899d64e 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,7 +19,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -30,7 +30,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64
index 27a3983..c905595 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -33,7 +33,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %21
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32
index ad77b2a..fb4c78b 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64
index 4e4768d..65d657e 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32
index 631c179..ed4a47d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64
index 6e30116..6ae714d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32
index 8886283..d80afe1 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,14 +15,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64
index 4b781b1..99fc3f6 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -25,7 +25,7 @@
    %ulong_32 = OpConstant %ulong 32
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32
index 93a69a8..5b20017 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64
index f2e1d24..ec56354 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32
index 6f05643..acebc88 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64
index eff63f8..5f7ffd3 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32
index 7839d45..6247618 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %10 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64
index 7369f2f..73b543d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32
index 6f30a9b..75f83b5 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64
index d42a157..6d0f940 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32
index dfa8ab4..f9382c3 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64
index 0aecf10..7d6f8ba 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32
index 759017b..b05e322 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64
index 94b6d21..e67626d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32
index ab7904c..c4fb091 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64
index 0d31d92..9078c85 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32
index ddb9bf4..a9527fa 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_9 = OpTypeStruct %uint %uchar
 %_ptr_CrossWorkgroup__struct_9 = OpTypePointer CrossWorkgroup %_struct_9
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_9
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64
index 41f7856..19185c3 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup__struct_10 = OpTypePointer CrossWorkgroup %_struct_10
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_10
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32
index cad9452..69750ca 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Kernel
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
   %_struct_9 = OpTypeStruct %uint %float
 %_ptr_CrossWorkgroup__struct_9 = OpTypePointer CrossWorkgroup %_struct_9
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_9
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64
index cdeadaa..b748de8 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %float = OpTypeFloat 32
@@ -24,7 +24,7 @@
 %_ptr_CrossWorkgroup__struct_10 = OpTypePointer CrossWorkgroup %_struct_10
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_10
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32
index d3605a5..e2812a6 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -24,7 +24,7 @@
  %_struct_11 = OpTypeStruct %v2uint %_struct_10
 %_ptr_CrossWorkgroup__struct_11 = OpTypePointer CrossWorkgroup %_struct_11
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_11
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %14 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64
index c46f254..35d05d0 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -27,7 +27,7 @@
 %_ptr_CrossWorkgroup__struct_12 = OpTypePointer CrossWorkgroup %_struct_12
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_12
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %16 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32
index fa4854b..5b356cd 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,14 +15,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64
index 352e69c..9bf35d7 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -25,7 +25,7 @@
    %ulong_32 = OpConstant %ulong 32
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %15 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32
index d16cb50..868ccfc 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64
index 4903b9c..4060237 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32
index 65f38e0..e281b55 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64
index bc27d85..1193428 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32
index fb343c3..8e79a65 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64
index 953a0c9..bc595e4 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32
index ce8380b..5425638 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64
index 8fd17af..0dff1dd 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel

diff --git a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32
index 20e3a70..1c40b60 100644
--- a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "unreachable_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "unreachable_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64
index 0c71180..eefd816 100644
--- a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "unreachable_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "unreachable_simple"
                OpName %in "in"
                OpName %out "out"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32
index 48fb4ea..9ea1871 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Vector16
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,14 +18,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
    %v16uchar = OpTypeVector %uchar 16
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v16uchar %_ptr_CrossWorkgroup_uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar

diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64
index 16f38a9..ce4d72c 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int8
                OpCapability Vector16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -19,7 +19,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v16uchar %_ptr_CrossWorkgroup_uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar

diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32
index 709161f..0ccbbf9 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32

@@ -9,7 +9,7 @@
                OpCapability Vector16
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,14 +18,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
    %v16uchar = OpTypeVector %uchar 16
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_v16uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar

diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64
index b5ef30b..dbb7cf5 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64

@@ -10,7 +10,7 @@
                OpCapability Int8
                OpCapability Vector16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -19,7 +19,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_v16uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar

diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32
index ec19d0c..559337c 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64
index 0a6f245..ccf2fb4 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,7 +18,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -27,7 +27,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_double

diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32
index b434804..1d4125a 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_v2double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64
index 126f591..48abb07 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,7 +18,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -27,7 +27,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_v2double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2double

diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32
index 62fd998..c3a1d9d 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64
index fbec1e3..e7df3b0 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_float

diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32
index 16993f5..7cc0d21 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_v4float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64
index e113bb5..24305d2 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_v4float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32
index f64b0bb..441623f 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %_ptr_CrossWorkgroup_uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64
index f9903e1..5298a51 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %_ptr_CrossWorkgroup_uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uint

diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32
index 265b9cc..e0ae874 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32

@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_v4uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint

diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64
index be2f94e..426ace1 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_v4uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint

diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32
index e925ec2..22925a0 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
     %v2ulong = OpTypeVector %ulong 2
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2ulong %_ptr_CrossWorkgroup_ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_ulong

diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64
index b1c9562..5ebac41 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2ulong %_ptr_CrossWorkgroup_ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_ulong

diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32
index 49bc9f6..d9cafab 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
     %v2ulong = OpTypeVector %ulong 2
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_v2ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong

diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64
index 74ae94c..f37e03d 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_v2ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong

diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32
index ec2c415..24d5b2a 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,14 +20,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
    %v4double = OpTypeVector %double 4
 %_ptr_CrossWorkgroup_v4double = OpTypePointer CrossWorkgroup %v4double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4double

diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64
index cd3755e..43e6487 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,7 +21,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
      %double = OpTypeFloat 64
@@ -29,7 +29,7 @@
    %v4double = OpTypeVector %double 4
 %_ptr_CrossWorkgroup_v4double = OpTypePointer CrossWorkgroup %v4double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4double

diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32
index 5c8b337..7e44936 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32

@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,14 +20,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64
index ecc078b..0fbd038 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64

@@ -9,7 +9,7 @@
                OpCapability Int16
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,7 +21,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
       %float = OpTypeFloat 32
@@ -29,7 +29,7 @@
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float

diff --git a/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv32 b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv32
new file mode 100644
index 0000000..9f07a61
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv64 b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv64
new file mode 100644
index 0000000..d590b8b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv32 b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv32
new file mode 100644
index 0000000..056b457
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv64 b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv64
new file mode 100644
index 0000000..a049df8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional.spv32 b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv32
new file mode 100644
index 0000000..2f731ac
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional.spv64 b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv64
new file mode 100644
index 0000000..cf1b875
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv32 b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv32
new file mode 100644
index 0000000..a0eb511
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv64 b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv64
new file mode 100644
index 0000000..bb1520f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/branch_simple.spv32 b/test_conformance/spirv_new/spirv_bin/branch_simple.spv32
new file mode 100644
index 0000000..c3c7103
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/branch_simple.spv64 b/test_conformance/spirv_new/spirv_bin/branch_simple.spv64
new file mode 100644
index 0000000..a5795b5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv32 b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv32
new file mode 100644
index 0000000..a17daa5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv64 b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv64
new file mode 100644
index 0000000..279a217
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv32 b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv32
new file mode 100644
index 0000000..4f881c5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv64 b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv64
new file mode 100644
index 0000000..22a21cd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv32
new file mode 100644
index 0000000..7f0a8fe
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv64
new file mode 100644
index 0000000..0a9b667
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv32
new file mode 100644
index 0000000..bb6b2bc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv64
new file mode 100644
index 0000000..4cb895c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv32
new file mode 100644
index 0000000..e4602a2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv64
new file mode 100644
index 0000000..302ac27
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv32
new file mode 100644
index 0000000..a97471b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv64
new file mode 100644
index 0000000..2891abf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv32
new file mode 100644
index 0000000..84c8019
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv64
new file mode 100644
index 0000000..a05102a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv32
new file mode 100644
index 0000000..879ed90
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv64
new file mode 100644
index 0000000..ea5580e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv32
new file mode 100644
index 0000000..4eb2ff5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv64
new file mode 100644
index 0000000..e4aeb68
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv32
new file mode 100644
index 0000000..34ed429
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv64
new file mode 100644
index 0000000..ce03e97
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv32
new file mode 100644
index 0000000..b99b03f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv64
new file mode 100644
index 0000000..04a00d9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv32
new file mode 100644
index 0000000..b121db4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv64
new file mode 100644
index 0000000..3f23030
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv32
new file mode 100644
index 0000000..4b57e43
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv64
new file mode 100644
index 0000000..39a461c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv32
new file mode 100644
index 0000000..7a9b688
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv64
new file mode 100644
index 0000000..b77f547
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv32
new file mode 100644
index 0000000..853af31
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv64
new file mode 100644
index 0000000..98ee089
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv32
new file mode 100644
index 0000000..582afa6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv64
new file mode 100644
index 0000000..1f2306d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv32
new file mode 100644
index 0000000..1b63faf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv64
new file mode 100644
index 0000000..8bf38d0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv32
new file mode 100644
index 0000000..2fa31dd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv64
new file mode 100644
index 0000000..eb8ea01
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv32
new file mode 100644
index 0000000..29a76a1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv64
new file mode 100644
index 0000000..116ebc8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv32
new file mode 100644
index 0000000..0fbde36
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv64
new file mode 100644
index 0000000..1f35a8d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv32
new file mode 100644
index 0000000..23334e0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv64
new file mode 100644
index 0000000..34c16d9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv32
new file mode 100644
index 0000000..a04ee05
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv64
new file mode 100644
index 0000000..7fa5ca4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv32
new file mode 100644
index 0000000..8f5f606
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv64
new file mode 100644
index 0000000..3775784
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv32
new file mode 100644
index 0000000..84eba79
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv64
new file mode 100644
index 0000000..3943a52
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv32
new file mode 100644
index 0000000..609c8b2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv64
new file mode 100644
index 0000000..485fb3c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv32
new file mode 100644
index 0000000..9865162
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv64
new file mode 100644
index 0000000..1a4e94e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv32
new file mode 100644
index 0000000..701a9e9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv64
new file mode 100644
index 0000000..61af430
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv32
new file mode 100644
index 0000000..daf3209
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv64
new file mode 100644
index 0000000..4848064
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv32
new file mode 100644
index 0000000..cd6b9fe
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv64
new file mode 100644
index 0000000..a706048
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv32
new file mode 100644
index 0000000..2ce64ab
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv64
new file mode 100644
index 0000000..0e9de18
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv32
new file mode 100644
index 0000000..811b282
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv64
new file mode 100644
index 0000000..832efe7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv32
new file mode 100644
index 0000000..7a1d6ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv64
new file mode 100644
index 0000000..c027f9c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv32
new file mode 100644
index 0000000..5416c5d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv64
new file mode 100644
index 0000000..ab6c827
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv32
new file mode 100644
index 0000000..053bf6c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv64
new file mode 100644
index 0000000..ca83456
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv32
new file mode 100644
index 0000000..8dc0519
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv64
new file mode 100644
index 0000000..b3434bb
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv32
new file mode 100644
index 0000000..3c1045b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv64
new file mode 100644
index 0000000..08db854
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv32
new file mode 100644
index 0000000..5c0444a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv64
new file mode 100644
index 0000000..6a13de3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv32
new file mode 100644
index 0000000..c5ef2f2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv64
new file mode 100644
index 0000000..d2b5508
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv32
new file mode 100644
index 0000000..6fb0279
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv64
new file mode 100644
index 0000000..267c0fd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv32
new file mode 100644
index 0000000..cda1bc9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv64
new file mode 100644
index 0000000..b9fa920
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv32
new file mode 100644
index 0000000..9ef85b5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv64
new file mode 100644
index 0000000..67700c1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv32
new file mode 100644
index 0000000..6872389
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv64
new file mode 100644
index 0000000..9fa9b9a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv32
new file mode 100644
index 0000000..fae2ef6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv64
new file mode 100644
index 0000000..adbca68
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv32
new file mode 100644
index 0000000..1b6ea15
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv64
new file mode 100644
index 0000000..a9ff218
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv32
new file mode 100644
index 0000000..6d57eba
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv64
new file mode 100644
index 0000000..a5389e4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv32
new file mode 100644
index 0000000..6574790
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv64
new file mode 100644
index 0000000..b6d6f06
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv32
new file mode 100644
index 0000000..35bd4cf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv64
new file mode 100644
index 0000000..86094bf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv32
new file mode 100644
index 0000000..b4d9a42
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv64
new file mode 100644
index 0000000..4ee9b03
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv32
new file mode 100644
index 0000000..2c64302
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv64
new file mode 100644
index 0000000..ff5e2b5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv32
new file mode 100644
index 0000000..3c8a9f6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv64
new file mode 100644
index 0000000..3e8ad6e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv32
new file mode 100644
index 0000000..a78de1c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv64
new file mode 100644
index 0000000..f836c09
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv32
new file mode 100644
index 0000000..cf6a0f7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv64
new file mode 100644
index 0000000..407595c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv32
new file mode 100644
index 0000000..315cbb5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv64
new file mode 100644
index 0000000..5c44027
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv32
new file mode 100644
index 0000000..66ea845
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv64
new file mode 100644
index 0000000..ec87755
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv32
new file mode 100644
index 0000000..949b331
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv64
new file mode 100644
index 0000000..6cb8ffc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv32
new file mode 100644
index 0000000..4786da6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv64
new file mode 100644
index 0000000..6b76d27
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv32
new file mode 100644
index 0000000..589334c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv64
new file mode 100644
index 0000000..31b1082
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv32
new file mode 100644
index 0000000..c83a913
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv64
new file mode 100644
index 0000000..0b66792
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv32
new file mode 100644
index 0000000..405f98d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv64
new file mode 100644
index 0000000..57b1878
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv32
new file mode 100644
index 0000000..5ca6abc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv64
new file mode 100644
index 0000000..75584f4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv32
new file mode 100644
index 0000000..403d239
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv64
new file mode 100644
index 0000000..261d7da
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv32
new file mode 100644
index 0000000..a0ad64d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv64
new file mode 100644
index 0000000..8c88c5b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv32
new file mode 100644
index 0000000..3450f6d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv64
new file mode 100644
index 0000000..a201855
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv32
new file mode 100644
index 0000000..a3bff0a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv64
new file mode 100644
index 0000000..8321f37
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv32
new file mode 100644
index 0000000..58ab454
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv64
new file mode 100644
index 0000000..25cba7f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv32
new file mode 100644
index 0000000..be274a7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv64
new file mode 100644
index 0000000..7e00ac8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv32
new file mode 100644
index 0000000..5342bb8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv64
new file mode 100644
index 0000000..ac03353
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv32
new file mode 100644
index 0000000..8427279
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv64
new file mode 100644
index 0000000..9706e0c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv32
new file mode 100644
index 0000000..9ca74db
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv64
new file mode 100644
index 0000000..5a9716a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_double.spv32
new file mode 100644
index 0000000..84fad15
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_double.spv64
new file mode 100644
index 0000000..f453a28
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv32
new file mode 100644
index 0000000..724c71d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv64
new file mode 100644
index 0000000..8bb4fe1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_float.spv32
new file mode 100644
index 0000000..238aeee
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_float.spv64
new file mode 100644
index 0000000..de28855
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv32
new file mode 100644
index 0000000..880f2d9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv64
new file mode 100644
index 0000000..652b8f4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_half.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_half.spv32
new file mode 100644
index 0000000..e029432
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_half.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fadd_half.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_half.spv64
new file mode 100644
index 0000000..ce54ba5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_half.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv32
new file mode 100644
index 0000000..4249cea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv64
new file mode 100644
index 0000000..47f39e8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv32
new file mode 100644
index 0000000..88ef457
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv64
new file mode 100644
index 0000000..6d75316
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv32
new file mode 100644
index 0000000..3278dea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv64
new file mode 100644
index 0000000..687048c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv32
new file mode 100644
index 0000000..24b267e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv64
new file mode 100644
index 0000000..5cd84ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_half.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv32
new file mode 100644
index 0000000..638c44a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_half.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv64
new file mode 100644
index 0000000..c7a1bff
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_double.spv32
new file mode 100644
index 0000000..80ebe51
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_double.spv64
new file mode 100644
index 0000000..a9dc49f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv32
new file mode 100644
index 0000000..f598faf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv64
new file mode 100644
index 0000000..eadd41e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_float.spv32
new file mode 100644
index 0000000..354087f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_float.spv64
new file mode 100644
index 0000000..20a0c70
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv32
new file mode 100644
index 0000000..d6a853c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv64
new file mode 100644
index 0000000..1616a10
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_half.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_half.spv32
new file mode 100644
index 0000000..5fac82e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_half.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmod_half.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_half.spv64
new file mode 100644
index 0000000..3a411a3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_half.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_double.spv32
new file mode 100644
index 0000000..8018033
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_double.spv64
new file mode 100644
index 0000000..e7e9807
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv32
new file mode 100644
index 0000000..4057b94
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv64
new file mode 100644
index 0000000..b308270
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_float.spv32
new file mode 100644
index 0000000..96d2a18
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_float.spv64
new file mode 100644
index 0000000..a160a8e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv32
new file mode 100644
index 0000000..5856ef1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv64
new file mode 100644
index 0000000..b3c22bd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_half.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_half.spv32
new file mode 100644
index 0000000..f9eff00
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_half.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fmul_half.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_half.spv64
new file mode 100644
index 0000000..467674e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_half.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_double.spv32 b/test_conformance/spirv_new/spirv_bin/frem_double.spv32
new file mode 100644
index 0000000..887e7ec
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_double.spv64 b/test_conformance/spirv_new/spirv_bin/frem_double.spv64
new file mode 100644
index 0000000..b11c4e5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_double2.spv32 b/test_conformance/spirv_new/spirv_bin/frem_double2.spv32
new file mode 100644
index 0000000..15a7192
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double2.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_double2.spv64 b/test_conformance/spirv_new/spirv_bin/frem_double2.spv64
new file mode 100644
index 0000000..13ffb49
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double2.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_float.spv32 b/test_conformance/spirv_new/spirv_bin/frem_float.spv32
new file mode 100644
index 0000000..7f8e5d5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_float.spv64 b/test_conformance/spirv_new/spirv_bin/frem_float.spv64
new file mode 100644
index 0000000..214541c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_float4.spv32 b/test_conformance/spirv_new/spirv_bin/frem_float4.spv32
new file mode 100644
index 0000000..e2f6610
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_float4.spv64 b/test_conformance/spirv_new/spirv_bin/frem_float4.spv64
new file mode 100644
index 0000000..4ae62aa
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_half.spv32 b/test_conformance/spirv_new/spirv_bin/frem_half.spv32
new file mode 100644
index 0000000..58da29f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_half.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/frem_half.spv64 b/test_conformance/spirv_new/spirv_bin/frem_half.spv64
new file mode 100644
index 0000000..3632752
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_half.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_double.spv32
new file mode 100644
index 0000000..8643553
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_double.spv64
new file mode 100644
index 0000000..63bef82
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv32
new file mode 100644
index 0000000..11dbd58
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv64
new file mode 100644
index 0000000..22c89d8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_float.spv32
new file mode 100644
index 0000000..aadc29d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_float.spv64
new file mode 100644
index 0000000..e8bbec1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv32
new file mode 100644
index 0000000..cb97b5c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv64
new file mode 100644
index 0000000..4b2b8d2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_half.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_half.spv32
new file mode 100644
index 0000000..4f7e4e9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_half.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/fsub_half.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_half.spv64
new file mode 100644
index 0000000..f5ccc29
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_half.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/label_simple.spv32 b/test_conformance/spirv_new/spirv_bin/label_simple.spv32
new file mode 100644
index 0000000..338a821
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/label_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/label_simple.spv64 b/test_conformance/spirv_new/spirv_bin/label_simple.spv64
new file mode 100644
index 0000000..66b4bfc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/label_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv32 b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv32
new file mode 100644
index 0000000..75a8ef6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv64 b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv64
new file mode 100644
index 0000000..34c873d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/linkage_export.spv32 b/test_conformance/spirv_new/spirv_bin/linkage_export.spv32
new file mode 100644
index 0000000..280021b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_export.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/linkage_export.spv64 b/test_conformance/spirv_new/spirv_bin/linkage_export.spv64
new file mode 100644
index 0000000..fefdc26
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_export.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/linkage_import.spv32 b/test_conformance/spirv_new/spirv_bin/linkage_import.spv32
new file mode 100644
index 0000000..e8ae0f3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_import.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/linkage_import.spv64 b/test_conformance/spirv_new/spirv_bin/linkage_import.spv64
new file mode 100644
index 0000000..9aa4876
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_import.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv32
new file mode 100644
index 0000000..96aa742
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv64
new file mode 100644
index 0000000..0b4ebd3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv32
new file mode 100644
index 0000000..f1c00ac
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv64
new file mode 100644
index 0000000..7490dd6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv32
new file mode 100644
index 0000000..8571f8e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv64
new file mode 100644
index 0000000..21a6a99
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv32
new file mode 100644
index 0000000..fbc355f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv64
new file mode 100644
index 0000000..c345403
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv32
new file mode 100644
index 0000000..a00b862
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv64
new file mode 100644
index 0000000..3a4787f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv32
new file mode 100644
index 0000000..305a0c7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv64
new file mode 100644
index 0000000..af93d9b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_const.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_const.spv32
new file mode 100644
index 0000000..c371d5d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_const.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_const.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_const.spv64
new file mode 100644
index 0000000..07df8ab
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_const.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_inline.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv32
new file mode 100644
index 0000000..3def2ee
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_inline.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv64
new file mode 100644
index 0000000..872e4ef
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv32
new file mode 100644
index 0000000..3ac4c24
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv64
new file mode 100644
index 0000000..c9fbf13
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_none.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_none.spv32
new file mode 100644
index 0000000..027522f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_none.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_none.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_none.spv64
new file mode 100644
index 0000000..5411a36
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_none.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv32
new file mode 100644
index 0000000..64a261e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv64
new file mode 100644
index 0000000..d03a1b7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv32
new file mode 100644
index 0000000..acdec9d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv64
new file mode 100644
index 0000000..34e495b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_double.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv32
new file mode 100644
index 0000000..005d38c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_double.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv64
new file mode 100644
index 0000000..71e7241
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv32
new file mode 100644
index 0000000..7a89c8d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv64
new file mode 100644
index 0000000..c76a39e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv32
new file mode 100644
index 0000000..f1a88fa
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv64
new file mode 100644
index 0000000..3d306ea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv32
new file mode 100644
index 0000000..7fcd069
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv64
new file mode 100644
index 0000000..7ad8a10
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv32
new file mode 100644
index 0000000..4841414
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv64
new file mode 100644
index 0000000..1fc4696
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_long.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv32
new file mode 100644
index 0000000..b4f7fe6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_long.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv64
new file mode 100644
index 0000000..c0db13e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_short.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv32
new file mode 100644
index 0000000..ddc874b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_short.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv64
new file mode 100644
index 0000000..3e29164
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_int.spv32
new file mode 100644
index 0000000..2e6f997
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_int.spv64
new file mode 100644
index 0000000..3c6f6d8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int4.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv32
new file mode 100644
index 0000000..bbc1b56
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int4.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv64
new file mode 100644
index 0000000..a40c025
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_long.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_long.spv32
new file mode 100644
index 0000000..b00d205
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_long.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_long.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_long.spv64
new file mode 100644
index 0000000..13719ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_long.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_short.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_short.spv32
new file mode 100644
index 0000000..fa1a88d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_short.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/op_not_short.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_short.spv64
new file mode 100644
index 0000000..648f92f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_short.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/opaque.spv32 b/test_conformance/spirv_new/spirv_bin/opaque.spv32
new file mode 100644
index 0000000..3400d98
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/opaque.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/opaque.spv64 b/test_conformance/spirv_new/spirv_bin/opaque.spv64
new file mode 100644
index 0000000..8383e93
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/opaque.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/phi_2.spv32 b/test_conformance/spirv_new/spirv_bin/phi_2.spv32
new file mode 100644
index 0000000..22ae786
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_2.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/phi_2.spv64 b/test_conformance/spirv_new/spirv_bin/phi_2.spv64
new file mode 100644
index 0000000..4ef16fb
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_2.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/phi_3.spv32 b/test_conformance/spirv_new/spirv_bin/phi_3.spv32
new file mode 100644
index 0000000..de55f63
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_3.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/phi_3.spv64 b/test_conformance/spirv_new/spirv_bin/phi_3.spv64
new file mode 100644
index 0000000..383b364
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_3.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/phi_4.spv32 b/test_conformance/spirv_new/spirv_bin/phi_4.spv32
new file mode 100644
index 0000000..45e74e0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_4.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/phi_4.spv64 b/test_conformance/spirv_new/spirv_bin/phi_4.spv64
new file mode 100644
index 0000000..f5fc763
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_4.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv32
new file mode 100644
index 0000000..e454917
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv64
new file mode 100644
index 0000000..913b327
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv32
new file mode 100644
index 0000000..5f42dcd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv64
new file mode 100644
index 0000000..f266d21
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_if_none.spv32 b/test_conformance/spirv_new/spirv_bin/select_if_none.spv32
new file mode 100644
index 0000000..be9ffb6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_none.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_if_none.spv64 b/test_conformance/spirv_new/spirv_bin/select_if_none.spv64
new file mode 100644
index 0000000..3719f76
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_none.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv32
new file mode 100644
index 0000000..5caee39
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv64
new file mode 100644
index 0000000..937cd4d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv32
new file mode 100644
index 0000000..c8ed6df
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv64
new file mode 100644
index 0000000..579681d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_none.spv32 b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv32
new file mode 100644
index 0000000..e92f494
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_none.spv64 b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv64
new file mode 100644
index 0000000..385281a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv32
new file mode 100644
index 0000000..aaa7d8d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv64
new file mode 100644
index 0000000..73318ea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv32
new file mode 100644
index 0000000..f527471
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv64
new file mode 100644
index 0000000..8260c99
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv32
new file mode 100644
index 0000000..1b56ccd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv64
new file mode 100644
index 0000000..5b3c33d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv32
new file mode 100644
index 0000000..fe1365a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv64
new file mode 100644
index 0000000..d3a78f1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv32
new file mode 100644
index 0000000..947fdc9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv64
new file mode 100644
index 0000000..ef6473f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv32
new file mode 100644
index 0000000..73ae0c2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv64
new file mode 100644
index 0000000..38e6b23
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv32
new file mode 100644
index 0000000..a4fac3e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv64
new file mode 100644
index 0000000..92c785a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv32
new file mode 100644
index 0000000..244aa7e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv64
new file mode 100644
index 0000000..ee152ea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv32
new file mode 100644
index 0000000..6c304c6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv64
new file mode 100644
index 0000000..fe0bae6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv32
new file mode 100644
index 0000000..c55c7ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv64
new file mode 100644
index 0000000..9aa240c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv32
new file mode 100644
index 0000000..97a0252
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv64
new file mode 100644
index 0000000..092cee0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv32
new file mode 100644
index 0000000..912e9e8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv64
new file mode 100644
index 0000000..a1ef3fc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv32
new file mode 100644
index 0000000..8d8fe39
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv64
new file mode 100644
index 0000000..8030440
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv32
new file mode 100644
index 0000000..08b9033
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv64
new file mode 100644
index 0000000..41c2bef
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv32
new file mode 100644
index 0000000..e3b3074
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv64
new file mode 100644
index 0000000..929d352
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv32
new file mode 100644
index 0000000..0073745
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv64
new file mode 100644
index 0000000..6e58cef
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv32
new file mode 100644
index 0000000..4a51c3c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv64
new file mode 100644
index 0000000..4651d73
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv32
new file mode 100644
index 0000000..6030982
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv64
new file mode 100644
index 0000000..c348a95
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv32 b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv32
new file mode 100644
index 0000000..0d4f238
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv64 b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv64
new file mode 100644
index 0000000..70596a0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv32
new file mode 100644
index 0000000..6acf7ee
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv64
new file mode 100644
index 0000000..65dde38
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv32
new file mode 100644
index 0000000..074e2e5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv64
new file mode 100644
index 0000000..3e12740
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv32
new file mode 100644
index 0000000..19eb940
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv64
new file mode 100644
index 0000000..f7cf8a7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv32
new file mode 100644
index 0000000..02601e5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv64
new file mode 100644
index 0000000..1dd7e63
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv32
new file mode 100644
index 0000000..a1c4888
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv64
new file mode 100644
index 0000000..16d0194
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv32
new file mode 100644
index 0000000..f6bba1e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv64
new file mode 100644
index 0000000..0d84bd6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv32
new file mode 100644
index 0000000..50d5dd4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv64
new file mode 100644
index 0000000..5b6b0c1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv32
new file mode 100644
index 0000000..006ec9a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv64
new file mode 100644
index 0000000..47996ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv32
new file mode 100644
index 0000000..229e3b6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv64
new file mode 100644
index 0000000..34d2517
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv32
new file mode 100644
index 0000000..e62cef9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv64
new file mode 100644
index 0000000..2eb8833
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv32 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv32
new file mode 100644
index 0000000..8e672dd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv64 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv64
new file mode 100644
index 0000000..21e4dc4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv32 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv32
new file mode 100644
index 0000000..2c4d9c3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv32
Binary files differ

diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv64 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv64
new file mode 100644
index 0000000..94dff07
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv64
Binary files differ

diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
index 9e1789c..84f8ed1 100644
--- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
+++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp

@@ -129,9 +129,13 @@
     {

         // Run the cl kernel for reference results

         clProgramWrapper prog;

-        clKernelWrapper kernel;

-        err = create_single_kernel_helper(context, &prog, &kernel, 1,

-                                          &kernelBuf, "fmath_cl");

+        err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL);

+        SPIRV_CHECK_ERROR(err, "Failed to create cl program");

+

+        err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);

+        SPIRV_CHECK_ERROR(err, "Failed to build program");

+

+        clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);

         SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");

 

         clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);


diff --git a/test_conformance/spirv_new/test_decorate.cpp b/test_conformance/spirv_new/test_decorate.cpp
index ccd7431..766a6b6 100644
--- a/test_conformance/spirv_new/test_decorate.cpp
+++ b/test_conformance/spirv_new/test_decorate.cpp

@@ -310,6 +310,174 @@
 TEST_SATURATED_CONVERSION(double, long, int)
 TEST_SATURATED_CONVERSION(double, ulong, uint)
 
+int test_image_decorate(cl_device_id deviceID,
+                        cl_context context,
+                        cl_command_queue queue,
+                        const char *name)
+{
+    const int width = 4096;
+    const int height = 4096;
+
+    std::vector<cl_uint4> src(width * height);
+    RandomSeed seed(gRandomSeed);
+
+    for (auto &val : src) {
+        val = genrand<cl_uint4>(seed);
+    }
+
+    cl_image_format imageFormat;
+    imageFormat.image_channel_data_type = CL_UNSIGNED_INT32;
+    imageFormat.image_channel_order = CL_RGBA;
+
+    cl_image_desc desc;
+    desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+    desc.image_width = width;
+    desc.image_height = height;
+    desc.image_depth = 0;
+    desc.image_array_size = 0;
+    desc.image_row_pitch = 0; // Automatically calculate the pitch
+    desc.image_slice_pitch = 0; // Not needed for 2D
+    desc.num_mip_levels = 0;
+    desc.num_samples = 0;
+    desc.mem_object = NULL;
+
+    cl_int err = CL_SUCCESS;
+
+    clMemWrapper srcImage = clCreateImage(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                          &imageFormat, &desc,
+                                          &src[0], &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+
+    size_t bytes = src.size() * sizeof(cl_uint4);
+    clMemWrapper dstBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+
+    clProgramWrapper prog;
+    err = get_program_with_il(prog, deviceID, context, name);
+    SPIRV_CHECK_ERROR(err, "Failed to build program");
+
+    clKernelWrapper kernel = clCreateKernel(prog, name, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dstBuffer);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 2 of the kernel");
+
+    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &srcImage);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 1 of the kernel");
+
+    size_t global[] = {width, height};
+    err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, NULL, 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to enqueue kernel");
+
+    std::vector<cl_uint4> dst(src.size());
+    err = clEnqueueReadBuffer(queue, dstBuffer, CL_TRUE, 0, bytes, &dst[0], 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to copy data back to host");
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            int srcIdx = j * width + i;
+            int dstIdx = i * height + j;
+            if (dst[dstIdx] != src[srcIdx]) {
+                log_error("Values do not match at location (%d, %d) of src\n", i, j);
+            }
+        }
+    }
+
+    return 0;
+}
+
+#define TEST_SPIRV_IMAGE_DECORATE(type)                         \
+    TEST_SPIRV_FUNC(decorate_##type)                            \
+    {                                                           \
+        PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);                \
+        return test_image_decorate(deviceID, context, queue,    \
+                                   "decorate_" #type);          \
+    }                                                           \
+
+TEST_SPIRV_IMAGE_DECORATE(volatile)
+TEST_SPIRV_IMAGE_DECORATE(coherent)
+TEST_SPIRV_IMAGE_DECORATE(nonwritable)
+
+TEST_SPIRV_FUNC(decorate_nonreadable)
+{
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);
+    const char *name = "decorate_nonreadable";
+    const int width = 4096;
+    const int height = 4096;
+    cl_int err = CL_SUCCESS;
+
+    std::vector<cl_uint4> src(width * height);
+    RandomSeed seed(gRandomSeed);
+
+    for (auto &val : src) {
+        val = genrand<cl_uint4>(seed);
+    }
+
+    size_t bytes = src.size() * sizeof(cl_uint4);
+    clMemWrapper srcBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+
+    err = clEnqueueWriteBuffer(queue, srcBuffer, CL_TRUE, 0, bytes, &src[0], 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to copy data back to host");
+
+    cl_image_format imageFormat;
+    imageFormat.image_channel_data_type = CL_UNSIGNED_INT32;
+    imageFormat.image_channel_order = CL_RGBA;
+
+    cl_image_desc desc;
+    desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+    desc.image_width = height;
+    desc.image_height = width;
+    desc.image_depth = 0;
+    desc.image_array_size = 0;
+    desc.image_row_pitch = 0; // Automatically calculate the pitch
+    desc.image_slice_pitch = 0; // Not needed for 2D
+    desc.num_mip_levels = 0;
+    desc.num_samples = 0;
+    desc.mem_object = NULL;
+
+
+    clMemWrapper dstImage = clCreateImage(context, CL_MEM_WRITE_ONLY,
+                                          &imageFormat, &desc,
+                                          NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+
+    clProgramWrapper prog;
+    err = get_program_with_il(prog, deviceID, context, name);
+    SPIRV_CHECK_ERROR(err, "Failed to build program");
+
+    clKernelWrapper kernel = clCreateKernel(prog, name, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dstImage);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 2 of the kernel");
+
+    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &srcBuffer);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 1 of the kernel");
+
+    size_t global[] = {width, height};
+    err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, NULL, 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to enqueue kernel");
+
+    std::vector<cl_uint4> dst(src.size());
+    size_t origin[] = {0, 0, 0};
+    size_t region[] = {height, width, 1};
+    err = clEnqueueReadImage(queue, dstImage, CL_TRUE, origin, region, 0, 0, &dst[0], 0, NULL, NULL);
+
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            int srcIdx = j * width + i;
+            int dstIdx = i * height + j;
+            if (dst[dstIdx] != src[srcIdx]) {
+                log_error("Values do not match at location (%d, %d) of src\n", i, j);
+            }
+        }
+    }
+
+    return 0;
+}
+
 template<typename Ti, typename To>
 int test_fp_rounding(cl_device_id deviceID,
                      cl_context context,

diff --git a/test_conformance/spirv_new/test_get_program_il.cpp b/test_conformance/spirv_new/test_get_program_il.cpp
deleted file mode 100644
index cf349d1..0000000
--- a/test_conformance/spirv_new/test_get_program_il.cpp
+++ /dev/null

@@ -1,105 +0,0 @@
-/******************************************************************
-Copyright (c) 2020 The Khronos Group Inc. All Rights Reserved.
-
-This code is protected by copyright laws and contains material proprietary to
-the Khronos Group, Inc. This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not
-be disclosed in whole or in part to third parties, and may not be reproduced,
-republished, distributed, transmitted, displayed, broadcast or otherwise
-exploited in any manner without the express prior written permission of Khronos
-Group. The receipt or possession of this code does not convey any rights to
-reproduce, disclose, or distribute its contents, or to manufacture, use, or sell
-anything that it may describe, in whole or in part other than under the terms of
-the Khronos Adopters Agreement or Khronos Conformance Test Source License
-Agreement as executed between Khronos and the recipient.
-******************************************************************/
-
-#include "testBase.h"
-
-const char *sample_kernel_code_single_line[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n"
-};
-
-TEST_SPIRV_FUNC(get_program_il)
-{
-    clProgramWrapper source_program;
-    size_t il_size = -1;
-    int error;
-
-    /* If a program has been created with clCreateProgramWithIL, CL_PROGRAM_IL
-     * should return the program IL it was created with and it's size */
-    if (gCoreILProgram || is_extension_available(deviceID, "cl_khr_il_program"))
-    {
-        clProgramWrapper il_program;
-        std::string spvStr = "op_function_none";
-        const char *spvName = spvStr.c_str();
-
-        std::vector<unsigned char> spirv_binary = readSPIRV(spvName);
-
-        int file_bytes = spirv_binary.size();
-        if (file_bytes == 0)
-        {
-            test_fail("ERROR: SPIRV file %s not found!\n", spvName);
-        }
-
-        /* Create program with IL */
-        unsigned char *spirv_buffer = &spirv_binary[0];
-
-        error = get_program_with_il(il_program, deviceID, context, spvName);
-
-        SPIRV_CHECK_ERROR(error, "Unable to create program with IL.");
-        if (il_program == NULL)
-        {
-            test_fail("ERROR: Unable to create test program!\n");
-        }
-
-        /* Check program IL is the same as the source IL */
-        unsigned char *buffer = new unsigned char[file_bytes];
-        error = clGetProgramInfo(il_program, CL_PROGRAM_IL, file_bytes, buffer,
-                                 &il_size);
-        SPIRV_CHECK_ERROR(error, "Unable to get program info.");
-
-        if (il_size != file_bytes)
-        {
-            test_fail("ERROR: Returned IL size is not the same as source IL "
-                      "size (%lu "
-                      "!= %lu)!\n",
-                      il_size, file_bytes);
-        }
-
-        if (memcmp(buffer, spirv_buffer, file_bytes) != 0)
-        {
-            test_fail("ERROR: Returned IL is not the same as source IL!\n");
-        }
-
-        delete[] buffer;
-    }
-
-    /* CL_PROGRAM_IL shouldn't return IL value unless program is created with
-     * clCreateProgramWithIL */
-    error = create_single_kernel_helper_create_program(
-        context, &source_program, 1, sample_kernel_code_single_line);
-    if (source_program == NULL)
-    {
-        test_fail("ERROR: Unable to create test program!\n");
-    }
-
-    if (gCompilationMode != kSpir_v)
-    {
-        error =
-            clGetProgramInfo(source_program, CL_PROGRAM_IL, 0, NULL, &il_size);
-        SPIRV_CHECK_ERROR(error, "Unable to get program il length");
-        if (il_size != 0)
-        {
-            test_fail(
-                "ERROR: Returned length of non-IL program IL is non-zero!\n");
-        }
-    }
-
-    return 0;
-}
\ No newline at end of file

diff --git a/test_conformance/spirv_new/test_linkage.cpp b/test_conformance/spirv_new/test_linkage.cpp
index cf518c3..93972c9 100644
--- a/test_conformance/spirv_new/test_linkage.cpp
+++ b/test_conformance/spirv_new/test_linkage.cpp

@@ -33,43 +33,17 @@
     }
     unsigned char *buffer = &buffer_vec[0];
 
-    if (gCoreILProgram)
-    {
-        prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
-        SPIRV_CHECK_ERROR(
-            err, "Failed to create program with clCreateProgramWithIL");
-    }
-    else
-    {
-        cl_platform_id platform;
-        err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
-                              sizeof(cl_platform_id), &platform, NULL);
-        SPIRV_CHECK_ERROR(err,
-                          "Failed to get platform info with clGetDeviceInfo");
-        clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-
-        clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                platform, "clCreateProgramWithILKHR");
-        if (clCreateProgramWithILKHR == NULL)
-        {
-            log_error(
-                "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-            return -1;
-        }
-        prog = clCreateProgramWithILKHR(context, buffer, file_bytes, &err);
-        SPIRV_CHECK_ERROR(
-            err, "Failed to create program with clCreateProgramWithILKHR");
-    }
+    prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create program with clCreateProgramWithIL");
 
     err = clCompileProgram(prog, 1, &deviceID,
                            NULL, // options
-                           0, // num headers
+                           0,    // num headers
                            NULL, // input headers
                            NULL, // header include names
                            NULL, // callback
-                           NULL // User data
-    );
+                           NULL  // User data
+        );
     SPIRV_CHECK_ERROR(err, "Failed to compile spv program");
     return 0;
 }

diff --git a/test_conformance/spirv_new/test_op_fmath.cpp b/test_conformance/spirv_new/test_op_fmath.cpp
index bec0667..7250eb1 100644
--- a/test_conformance/spirv_new/test_op_fmath.cpp
+++ b/test_conformance/spirv_new/test_op_fmath.cpp

@@ -89,9 +89,13 @@
     {
         // Run the cl kernel for reference results
         clProgramWrapper prog;
-        clKernelWrapper kernel;
-        err = create_single_kernel_helper(context, &prog, &kernel, 1,
-                                          &kernelBuf, "fmath_cl");
+        err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to create cl program");
+
+        err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to build program");
+
+        clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);
         SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
 
         clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);

diff --git a/test_conformance/spirv_new/test_op_opaque.cpp b/test_conformance/spirv_new/test_op_opaque.cpp
index 067d9e4..930e26d 100644
--- a/test_conformance/spirv_new/test_op_opaque.cpp
+++ b/test_conformance/spirv_new/test_op_opaque.cpp

@@ -28,45 +28,17 @@
     }
     unsigned char *buffer = &buffer_vec[0];
 
-    clProgramWrapper prog;
-
-    if (gCoreILProgram)
-    {
-        prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
-        SPIRV_CHECK_ERROR(
-            err, "Failed to create program with clCreateProgramWithIL");
-    }
-    else
-    {
-        cl_platform_id platform;
-        err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
-                              sizeof(cl_platform_id), &platform, NULL);
-        SPIRV_CHECK_ERROR(err,
-                          "Failed to get platform info with clGetDeviceInfo");
-        clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-
-        clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                platform, "clCreateProgramWithILKHR");
-        if (clCreateProgramWithILKHR == NULL)
-        {
-            log_error(
-                "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-            return -1;
-        }
-        prog = clCreateProgramWithILKHR(context, buffer, file_bytes, &err);
-        SPIRV_CHECK_ERROR(
-            err, "Failed to create program with clCreateProgramWithILKHR");
-    }
+    clProgramWrapper prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create program with clCreateProgramWithIL");
 
     err = clCompileProgram(prog, 1, &deviceID,
                            NULL, // options
-                           0, // num headers
+                           0,    // num headers
                            NULL, // input headers
                            NULL, // header include names
                            NULL, // callback
-                           NULL // User data
-    );
+                           NULL  // User data
+        );
     SPIRV_CHECK_ERROR(err, "Failed to compile spv program");
     return 0;
 }

diff --git a/test_conformance/spirv_new/test_op_spec_constant.cpp b/test_conformance/spirv_new/test_op_spec_constant.cpp
deleted file mode 100644
index a280a4f..0000000
--- a/test_conformance/spirv_new/test_op_spec_constant.cpp
+++ /dev/null

@@ -1,157 +0,0 @@
-/******************************************************************
-Copyright (c) 2020 The Khronos Group Inc. All Rights Reserved.
-
-This code is protected by copyright laws and contains material proprietary to
-the Khronos Group, Inc. This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not
-be disclosed in whole or in part to third parties, and may not be reproduced,
-republished, distributed, transmitted, displayed, broadcast or otherwise
-exploited in any manner without the express prior written permission of Khronos
-Group. The receipt or possession of this code does not convey any rights to
-reproduce, disclose, or distribute its contents, or to manufacture, use, or sell
-anything that it may describe, in whole or in part other than under the terms of
-the Khronos Adopters Agreement or Khronos Conformance Test Source License
-Agreement as executed between Khronos and the recipient.
-******************************************************************/
-
-#include "testBase.h"
-#include "types.hpp"
-
-
-template <typename T>
-int run_case(cl_device_id deviceID, cl_context context, cl_command_queue queue,
-             const char *name, T init_buffer, T spec_constant_value,
-             T final_value, bool use_spec_constant)
-{
-    clProgramWrapper prog;
-    cl_int err = CL_SUCCESS;
-    if (use_spec_constant)
-    {
-        spec_const new_spec_const =
-            spec_const(101, sizeof(T), &spec_constant_value);
-
-        err =
-            get_program_with_il(prog, deviceID, context, name, new_spec_const);
-    }
-    else
-    {
-        err = get_program_with_il(prog, deviceID, context, name);
-    }
-    SPIRV_CHECK_ERROR(err, "Failed to build program");
-
-    clKernelWrapper kernel = clCreateKernel(prog, "spec_const_kernel", &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create kernel");
-    size_t bytes = sizeof(T);
-    clMemWrapper output_buffer =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bytes,
-                       &init_buffer, &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create output_buffer");
-
-    err = clSetKernelArg(kernel, 0, sizeof(clMemWrapper), &output_buffer);
-    SPIRV_CHECK_ERROR(err, "Failed to set kernel argument output_buffer");
-
-    size_t work_size = 1;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &work_size, NULL, 0,
-                                 NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to enqueue kernel");
-
-    T device_results = 0;
-    err = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, bytes,
-                              &device_results, 0, NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to copy from output_buffer");
-    T reference = 0;
-    use_spec_constant ? reference = final_value : reference = init_buffer;
-    if (device_results != reference)
-    {
-        log_error("Values do not match. Expected %d obtained %d\n", reference,
-                  device_results);
-        err = -1;
-    }
-    return err;
-}
-
-template <typename T>
-int test_spec_constant(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, const char *name, T init_buffer,
-                       T spec_constant_value, T final_value)
-{
-    if (std::string(name).find("double") != std::string::npos)
-    {
-        if (!is_extension_available(deviceID, "cl_khr_fp64"))
-        {
-            log_info("Extension cl_khr_fp64 not supported; skipping double "
-                     "tests.\n");
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
-    if (std::string(name).find("half") != std::string::npos)
-    {
-        if (!is_extension_available(deviceID, "cl_khr_fp16"))
-        {
-            log_info("Extension cl_khr_fp16 not supported; skipping half "
-                     "tests.\n");
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
-    cl_int err = CL_SUCCESS;
-    err = run_case<T>(deviceID, context, queue, name, init_buffer,
-                      spec_constant_value, final_value, false);
-    err |= run_case<T>(deviceID, context, queue, name, init_buffer,
-                       spec_constant_value, final_value, true);
-
-    if (err == CL_SUCCESS)
-    {
-        return TEST_PASS;
-    }
-    else
-    {
-        return TEST_FAIL;
-    }
-}
-
-
-#define TEST_SPEC_CONSTANT(NAME, type, init_buffer, spec_constant_value)       \
-    TEST_SPIRV_FUNC_VERSION(op_spec_constant_##NAME##_simple, Version(2, 2))   \
-    {                                                                          \
-        type init_value = init_buffer;                                         \
-        type final_value = init_value + spec_constant_value;                   \
-        return test_spec_constant(                                             \
-            deviceID, context, queue, "op_spec_constant_" #NAME "_simple",     \
-            init_value, (type)spec_constant_value, final_value);               \
-    }
-
-// type name, type, value init, spec constant value
-TEST_SPEC_CONSTANT(uint, cl_uint, 25, 43)
-TEST_SPEC_CONSTANT(uchar, cl_uchar, 19, 4)
-TEST_SPEC_CONSTANT(ushort, cl_ushort, 6000, 3000)
-TEST_SPEC_CONSTANT(ulong, cl_ulong, 9223372036854775000UL, 200)
-TEST_SPEC_CONSTANT(float, cl_float, 1.5, -3.7)
-TEST_SPEC_CONSTANT(half, cl_half, 1, 2)
-TEST_SPEC_CONSTANT(double, cl_double, 14534.53453, 1.53453)
-
-// Boolean tests
-// documenation: 'If a specialization constant is a boolean
-// constant, spec_value should be a pointer to a cl_uchar value'
-
-TEST_SPIRV_FUNC_VERSION(op_spec_constant_true_simple, Version(2, 2))
-{
-    // 1-st ndrange init_value is expected value (no change)
-    // 2-nd ndrange sets spec const to 'false' so value = value + 1
-    cl_uchar value = (cl_uchar)7;
-    cl_uchar init_value = value;
-    cl_uchar final_value = value + 1;
-    return test_spec_constant<cl_uchar>(deviceID, context, queue,
-                                        "op_spec_constant_true_simple",
-                                        init_value, 0, final_value);
-}
-
-TEST_SPIRV_FUNC_VERSION(op_spec_constant_false_simple, Version(2, 2))
-{
-    // 1-st ndrange init_value is expected value (no change)
-    // 2-nd ndrange sets spec const to 'true' so value = value + 1
-    cl_uchar value = (cl_uchar)7;
-    cl_uchar init_value = value;
-    cl_uchar final_value = value + 1;
-    return test_spec_constant<cl_uchar>(deviceID, context, queue,
-                                        "op_spec_constant_false_simple",
-                                        init_value, 1, final_value);
-}

diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
index 0a604bc..99d71f7 100644
--- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
+++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp

@@ -82,11 +82,15 @@
     {
         // Run the cl kernel for reference results
         clProgramWrapper prog;
-        clKernelWrapper kernel;
-        err = create_single_kernel_helper(context, &prog, &kernel, 1,
-                                          &kernelBuf, "vector_times_scalar");
+        err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL);
         SPIRV_CHECK_ERROR(err, "Failed to create cl program");
 
+        err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to build program");
+
+        clKernelWrapper kernel = clCreateKernel(prog, "vector_times_scalar", &err);
+        SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
+
         clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, res_bytes, NULL, &err);
         SPIRV_CHECK_ERROR(err, "Failed to create ref buffer");
 

diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt
index d48af9c..eb6a607 100644
--- a/test_conformance/subgroups/CMakeLists.txt
+++ b/test_conformance/subgroups/CMakeLists.txt

@@ -5,16 +5,8 @@
     test_barrier.cpp
     test_queries.cpp
     test_workitem.cpp
-    test_subgroup.cpp
+    test_workgroup.cpp
     test_ifp.cpp
-    test_subgroup_extended_types.cpp
-    subgroup_common_kernels.cpp
-    test_subgroup_non_uniform_vote.cpp
-    test_subgroup_non_uniform_arithmetic.cpp
-    test_subgroup_ballot.cpp
-    test_subgroup_clustered_reduce.cpp
-    test_subgroup_shuffle.cpp
-    test_subgroup_shuffle_relative.cpp
 )
 
 include(../CMakeCommon.txt)

diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
index 44416dd..f9a9a9d 100644
--- a/test_conformance/subgroups/main.cpp
+++ b/test_conformance/subgroups/main.cpp

@@ -27,19 +27,12 @@
     ADD_TEST_VERSION(sub_group_info_core, Version(2, 1)),
     ADD_TEST_VERSION(work_item_functions_ext, Version(2, 0)),
     ADD_TEST_VERSION(work_item_functions_core, Version(2, 1)),
-    ADD_TEST_VERSION(subgroup_functions_ext, Version(2, 0)),
-    ADD_TEST_VERSION(subgroup_functions_core, Version(2, 1)),
+    ADD_TEST_VERSION(work_group_functions_ext, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_functions_core, Version(2, 1)),
     ADD_TEST_VERSION(barrier_functions_ext, Version(2, 0)),
     ADD_TEST_VERSION(barrier_functions_core, Version(2, 1)),
     ADD_TEST_VERSION(ifp_ext, Version(2, 0)),
-    ADD_TEST_VERSION(ifp_core, Version(2, 1)),
-    ADD_TEST(subgroup_functions_extended_types),
-    ADD_TEST(subgroup_functions_non_uniform_vote),
-    ADD_TEST(subgroup_functions_non_uniform_arithmetic),
-    ADD_TEST(subgroup_functions_ballot),
-    ADD_TEST(subgroup_functions_clustered_reduce),
-    ADD_TEST(subgroup_functions_shuffle),
-    ADD_TEST(subgroup_functions_shuffle_relative)
+    ADD_TEST_VERSION(ifp_core, Version(2, 1))
 };
 
 const int test_num = ARRAY_SIZE(test_list);

diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h
index d09e824..3ebb13b 100644
--- a/test_conformance/subgroups/procs.h
+++ b/test_conformance/subgroups/procs.h

@@ -37,12 +37,14 @@
                                          cl_context context,
                                          cl_command_queue queue,
                                          int num_elements);
-extern int test_subgroup_functions_ext(cl_device_id device, cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_subgroup_functions_core(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
+extern int test_work_group_functions_ext(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_work_group_functions_core(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
 extern int test_barrier_functions_ext(cl_device_id device, cl_context context,
                                       cl_command_queue queue, int num_elements);
 extern int test_barrier_functions_core(cl_device_id device, cl_context context,
@@ -54,31 +56,5 @@
                         cl_command_queue queue, int num_elements);
 extern int test_ifp_core(cl_device_id device, cl_context context,
                          cl_command_queue queue, int num_elements);
-extern int test_subgroup_functions_extended_types(cl_device_id device,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_subgroup_functions_non_uniform_vote(cl_device_id device,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
-extern int test_subgroup_functions_non_uniform_arithmetic(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_subgroup_functions_ballot(cl_device_id device,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_subgroup_functions_clustered_reduce(cl_device_id device,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
-extern int test_subgroup_functions_shuffle(cl_device_id device,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_subgroup_functions_shuffle_relative(cl_device_id device,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
+
 #endif /*_procs_h*/

diff --git a/test_conformance/subgroups/subgroup_common_kernels.cpp b/test_conformance/subgroups/subgroup_common_kernels.cpp
deleted file mode 100644
index f8b2445..0000000
--- a/test_conformance/subgroups/subgroup_common_kernels.cpp
+++ /dev/null

@@ -1,106 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "subgroup_common_kernels.h"
-
-const char* bcast_source =
-    "__kernel void test_bcast(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint which_sub_group_local_id = xy[gid].z;\n"
-    "    out[gid] = sub_group_broadcast(x, which_sub_group_local_id);\n"
-
-    "}\n";
-
-const char* redadd_source = "__kernel void test_redadd(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_add(in[gid]);\n"
-                            "}\n";
-
-const char* redmax_source = "__kernel void test_redmax(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_max(in[gid]);\n"
-                            "}\n";
-
-const char* redmin_source = "__kernel void test_redmin(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_min(in[gid]);\n"
-                            "}\n";
-
-const char* scinadd_source =
-    "__kernel void test_scinadd(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
-    "}\n";
-
-const char* scinmax_source =
-    "__kernel void test_scinmax(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
-    "}\n";
-
-const char* scinmin_source =
-    "__kernel void test_scinmin(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
-    "}\n";
-
-const char* scexadd_source =
-    "__kernel void test_scexadd(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
-    "}\n";
-
-const char* scexmax_source =
-    "__kernel void test_scexmax(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
-    "}\n";
-
-const char* scexmin_source =
-    "__kernel void test_scexmin(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
-    "}\n";

diff --git a/test_conformance/subgroups/subgroup_common_kernels.h b/test_conformance/subgroups/subgroup_common_kernels.h
deleted file mode 100644
index 8ae97d9..0000000
--- a/test_conformance/subgroups/subgroup_common_kernels.h
+++ /dev/null

@@ -1,32 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef SUBGROUPKERNELSOURCES_H
-#define SUBGROUPKERNELSOURCES_H
-#include "subhelpers.h"
-
-
-extern const char* bcast_source;
-extern const char* redadd_source;
-extern const char* redmax_source;
-extern const char* redmin_source;
-extern const char* scinadd_source;
-extern const char* scinmax_source;
-extern const char* scinmin_source;
-extern const char* scexadd_source;
-extern const char* scexmax_source;
-extern const char* scexmin_source;
-
-#endif

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
deleted file mode 100644
index b30c416..0000000
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ /dev/null

@@ -1,911 +0,0 @@
-//
-// Copyright (c) 2020 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef SUBGROUPCOMMONTEMPLATES_H
-#define SUBGROUPCOMMONTEMPLATES_H
-
-#include "typeWrappers.h"
-#include <bitset>
-#include "CL/cl_half.h"
-#include "subhelpers.h"
-
-#include <set>
-
-typedef std::bitset<128> bs128;
-static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
-                                  const std::string &mask_type,
-                                  cl_uint max_sub_group_size)
-{
-    bs128 mask128;
-    cl_uint4 mask;
-    cl_uint pos = subgroup_local_id;
-    if (mask_type == "eq") mask128.set(pos);
-    if (mask_type == "le" || mask_type == "lt")
-    {
-        for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
-        if (mask_type == "lt") mask128.reset(pos);
-    }
-    if (mask_type == "ge" || mask_type == "gt")
-    {
-        for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
-        if (mask_type == "gt") mask128.reset(pos);
-    }
-
-    // convert std::bitset<128> to uint4
-    auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
-    mask.s0 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s1 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s2 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s3 = (mask128 & uint_mask).to_ulong();
-
-    return mask;
-}
-
-// DESCRIPTION :
-// sub_group_broadcast - each work_item registers it's own value.
-// All work_items in subgroup takes one value from only one (any) work_item
-// sub_group_broadcast_first - same as type 0. All work_items in
-// subgroup takes only one value from only one chosen (the smallest subgroup ID)
-// work_item
-// sub_group_non_uniform_broadcast - same as type 0 but
-// only 4 work_items from subgroup enter the code (are active)
-template <typename Ty, SubgroupsBroadcastOp operation> struct BC
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        int d = ns > 100 ? 100 : ns;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        int last_subgroup_size = 0;
-        ii = 0;
-
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-            ng++;
-        }
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-                int bcast_if = 0;
-                int bcast_elseif = 0;
-                int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                    % (d > n ? n : d);
-                // l - calculate subgroup local id from which value will be
-                // broadcasted (one the same value for whole subgroup)
-                if (operation != SubgroupsBroadcastOp::broadcast)
-                {
-                    // reduce brodcasting index in case of non_uniform and
-                    // last workgroup last subgroup
-                    if (last_subgroup_size && j == nj - 1
-                        && last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS)
-                    {
-                        bcast_if = bcast_index % last_subgroup_size;
-                        bcast_elseif = bcast_if;
-                    }
-                    else
-                    {
-                        bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS;
-                        bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS
-                            + bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS);
-                    }
-                }
-
-                for (i = 0; i < n; ++i)
-                {
-                    if (operation == SubgroupsBroadcastOp::broadcast)
-                    {
-                        int midx = 4 * ii + 4 * i + 2;
-                        m[midx] = (cl_int)bcast_index;
-                    }
-                    else
-                    {
-                        if (i < NR_OF_ACTIVE_WORK_ITEMS)
-                        {
-                            // index of the third
-                            // element int the vector.
-                            int midx = 4 * ii + 4 * i + 2;
-                            // storing information about
-                            // broadcasting index -
-                            // earlier calculated
-                            m[midx] = (cl_int)bcast_if;
-                        }
-                        else
-                        { // index of the third
-                          // element int the vector.
-                            int midx = 4 * ii + 4 * i + 3;
-                            m[midx] = (cl_int)bcast_elseif;
-                        }
-                    }
-
-                    // calculate value for broadcasting
-                    cl_ulong number = genrand_int64(gMTdata);
-                    set_value(t[ii + i], number);
-                }
-            }
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            { // for each element in work_group
-                // calculate index as number of subgroup
-                // plus subgroup local id
-                x[j] = t[j];
-            }
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, l, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        int last_subgroup_size = 0;
-        if (non_uniform_size) ng++;
-
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-
-                // Check result
-                if (operation == SubgroupsBroadcastOp::broadcast_first)
-                {
-                    int lowest_active_id = -1;
-                    for (i = 0; i < n; ++i)
-                    {
-
-                        lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS
-                            ? 0
-                            : NR_OF_ACTIVE_WORK_ITEMS;
-                        //  findout if broadcasted
-                        //  value is the same
-                        tr = mx[ii + lowest_active_id];
-                        //  findout if broadcasted to all
-                        rr = my[ii + i];
-
-                        if (!compare(rr, tr))
-                        {
-                            log_error(
-                                "ERROR: sub_group_broadcast_first(%s) "
-                                "mismatch "
-                                "for local id %d in sub group %d in group "
-                                "%d\n",
-                                TypeManager<Ty>::name(), i, j, k);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-                else
-                {
-                    for (i = 0; i < n; ++i)
-                    {
-                        if (operation == SubgroupsBroadcastOp::broadcast)
-                        {
-                            int midx = 4 * ii + 4 * i + 2;
-                            l = (int)m[midx];
-                            tr = mx[ii + l];
-                        }
-                        else
-                        {
-                            if (i < NR_OF_ACTIVE_WORK_ITEMS)
-                            { // take index of array where info
-                              // which work_item will be
-                              // broadcast its value is stored
-                                int midx = 4 * ii + 4 * i + 2;
-                                // take subgroup local id of
-                                // this work_item
-                                l = (int)m[midx];
-                                // take value generated on host
-                                // for this work_item
-                                tr = mx[ii + l];
-                            }
-                            else
-                            {
-                                int midx = 4 * ii + 4 * i + 3;
-                                l = (int)m[midx];
-                                tr = mx[ii + l];
-                            }
-                        }
-                        rr = my[ii + i]; // read device outputs for
-                                         // work_item in the subgroup
-
-                        if (!compare(rr, tr))
-                        {
-                            log_error("ERROR: sub_group_%s(%s) "
-                                      "mismatch for local id %d in sub "
-                                      "group %d in group %d - got %lu "
-                                      "expected %lu\n",
-                                      operation_names(operation),
-                                      TypeManager<Ty>::name(), i, j, k, rr, tr);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-};
-
-static float to_float(subgroups::cl_half x) { return cl_half_to_float(x.data); }
-
-static subgroups::cl_half to_half(float x)
-{
-    subgroups::cl_half value;
-    value.data = cl_half_from_float(x, CL_HALF_RTE);
-    return value;
-}
-
-// for integer types
-template <typename Ty> inline Ty calculate(Ty a, Ty b, ArithmeticOp operation)
-{
-    switch (operation)
-    {
-        case ArithmeticOp::add_: return a + b;
-        case ArithmeticOp::max_: return a > b ? a : b;
-        case ArithmeticOp::min_: return a < b ? a : b;
-        case ArithmeticOp::mul_: return a * b;
-        case ArithmeticOp::and_: return a & b;
-        case ArithmeticOp::or_: return a | b;
-        case ArithmeticOp::xor_: return a ^ b;
-        case ArithmeticOp::logical_and: return a && b;
-        case ArithmeticOp::logical_or: return a || b;
-        case ArithmeticOp::logical_xor: return !a ^ !b;
-        default: log_error("Unknown operation request"); break;
-    }
-    return 0;
-}
-// Specialize for floating points.
-template <>
-inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation)
-{
-    switch (operation)
-    {
-        case ArithmeticOp::add_: {
-            return a + b;
-        }
-        case ArithmeticOp::max_: {
-            return a > b ? a : b;
-        }
-        case ArithmeticOp::min_: {
-            return a < b ? a : b;
-        }
-        case ArithmeticOp::mul_: {
-            return a * b;
-        }
-        default: log_error("Unknown operation request"); break;
-    }
-    return 0;
-}
-
-template <>
-inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation)
-{
-    switch (operation)
-    {
-        case ArithmeticOp::add_: {
-            return a + b;
-        }
-        case ArithmeticOp::max_: {
-            return a > b ? a : b;
-        }
-        case ArithmeticOp::min_: {
-            return a < b ? a : b;
-        }
-        case ArithmeticOp::mul_: {
-            return a * b;
-        }
-        default: log_error("Unknown operation request"); break;
-    }
-    return 0;
-}
-
-template <>
-inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b,
-                                    ArithmeticOp operation)
-{
-    switch (operation)
-    {
-        case ArithmeticOp::add_: return to_half(to_float(a) + to_float(b));
-        case ArithmeticOp::max_:
-            return to_float(a) > to_float(b) || is_half_nan(b.data) ? a : b;
-        case ArithmeticOp::min_:
-            return to_float(a) < to_float(b) || is_half_nan(b.data) ? a : b;
-        case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b));
-        default: log_error("Unknown operation request"); break;
-    }
-    return to_half(0);
-}
-
-template <typename Ty> bool is_floating_point()
-{
-    return std::is_floating_point<Ty>::value
-        || std::is_same<Ty, subgroups::cl_half>::value;
-}
-
-template <typename Ty, ArithmeticOp operation>
-void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
-{
-    int nj = (nw + ns - 1) / ns;
-
-    for (int k = 0; k < ng; ++k)
-    {
-        for (int j = 0; j < nj; ++j)
-        {
-            int ii = j * ns;
-            int n = ii + ns > nw ? nw - ii : ns;
-
-            for (int i = 0; i < n; ++i)
-            {
-                cl_ulong out_value;
-                double y;
-                if (operation == ArithmeticOp::mul_
-                    || operation == ArithmeticOp::add_)
-                {
-                    // work around to avoid overflow, do not use 0 for
-                    // multiplication
-                    out_value = (genrand_int32(gMTdata) % 4) + 1;
-                }
-                else
-                {
-                    out_value = genrand_int64(gMTdata) % (32 * n);
-                    if ((operation == ArithmeticOp::logical_and
-                         || operation == ArithmeticOp::logical_or
-                         || operation == ArithmeticOp::logical_xor)
-                        && ((out_value >> 32) & 1) == 0)
-                        out_value = 0; // increase probability of false
-                }
-                set_value(t[ii + i], out_value);
-            }
-        }
-
-        // Now map into work group using map from device
-        for (int j = 0; j < nw; ++j)
-        {
-            x[j] = t[j];
-        }
-
-        x += nw;
-        m += 4 * nw;
-    }
-}
-
-template <typename Ty, ShuffleOp operation> struct SHF
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, l, n, delta;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        int d = ns > 100 ? 100 : ns;
-        ii = 0;
-        ng = ng / nw;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                for (i = 0; i < n; ++i)
-                {
-                    int midx = 4 * ii + 4 * i + 2;
-                    l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                        % (d > n ? n : d);
-                    switch (operation)
-                    {
-                        case ShuffleOp::shuffle:
-                        case ShuffleOp::shuffle_xor:
-                            // storing information about shuffle index
-                            m[midx] = (cl_int)l;
-                            break;
-                        case ShuffleOp::shuffle_up:
-                            delta = l; // calculate delta for shuffle up
-                            if (i - delta < 0)
-                            {
-                                delta = i;
-                            }
-                            m[midx] = (cl_int)delta;
-                            break;
-                        case ShuffleOp::shuffle_down:
-                            delta = l; // calculate delta for shuffle down
-                            if (i + delta >= n)
-                            {
-                                delta = n - 1 - i;
-                            }
-                            m[midx] = (cl_int)delta;
-                            break;
-                        default: break;
-                    }
-                    cl_ulong number = genrand_int64(gMTdata);
-                    set_value(t[ii + i], number);
-                }
-            }
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            { // for each element in work_group
-                x[j] = t[j];
-            }
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, l, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        ng = ng / nw;
-
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-
-                for (i = 0; i < n; ++i)
-                { // inside the subgroup
-                  // shuffle index storage
-                    int midx = 4 * ii + 4 * i + 2;
-                    l = (int)m[midx];
-                    rr = my[ii + i];
-                    switch (operation)
-                    {
-                        // shuffle basic - treat l as index
-                        case ShuffleOp::shuffle: tr = mx[ii + l]; break;
-                        // shuffle up - treat l as delta
-                        case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break;
-                        // shuffle up - treat l as delta
-                        case ShuffleOp::shuffle_down:
-                            tr = mx[ii + i + l];
-                            break;
-                        // shuffle xor - treat l as mask
-                        case ShuffleOp::shuffle_xor:
-                            tr = mx[ii + (i ^ l)];
-                            break;
-                        default: break;
-                    }
-
-                    if (!compare(rr, tr))
-                    {
-                        log_error("ERROR: sub_group_%s(%s) mismatch for "
-                                  "local id %d in sub group %d in group %d\n",
-                                  operation_names(operation),
-                                  TypeManager<Ty>::name(), i, j, k);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-};
-
-template <typename Ty, ArithmeticOp operation> struct SCEX_NU
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        ng = ng / nw;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
-                        : func_name = "sub_group_scan_exclusive";
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        ng = ng / nw;
-
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
-                        : func_name = "sub_group_scan_exclusive";
-
-        uint32_t use_work_items_mask;
-        // for uniform case take into consideration all workitems
-        use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            // Map to array indexed to array indexed by local ID and sub group
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                std::set<int> active_work_items;
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
-                    {
-                        active_work_items.insert(i);
-                    }
-                }
-                if (active_work_items.empty())
-                {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-                else if (active_work_items.size() == 1)
-                {
-                    log_info("  One active workitem in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-                else
-                {
-                    tr = TypeManager<Ty>::identify_limits(operation);
-                    int idx = 0;
-                    for (const int &active_work_item : active_work_items)
-                    {
-                        rr = my[ii + active_work_item];
-                        if (idx == 0) continue;
-
-                        if (!compare_ordered(rr, tr))
-                        {
-                            log_error(
-                                "ERROR: %s_%s(%s) "
-                                "mismatch for local id %d in sub group %d in "
-                                "group %d Expected: %d Obtained: %d\n",
-                                func_name.c_str(), operation_names(operation),
-                                TypeManager<Ty>::name(), i, j, k, tr, rr);
-                            return TEST_FAIL;
-                        }
-                        tr = calculate<Ty>(tr, mx[ii + active_work_item],
-                                           operation);
-                        idx++;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-};
-
-// Test for scan inclusive non uniform functions
-template <typename Ty, ArithmeticOp operation> struct SCIN_NU
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        ng = ng / nw;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
-                        : func_name = "sub_group_scan_inclusive";
-
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        ng = ng / nw;
-
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
-                        : func_name = "sub_group_scan_inclusive";
-
-        uint32_t use_work_items_mask;
-        // for uniform case take into consideration all workitems
-        use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
-        // std::bitset<32> mask32(use_work_items_mask);
-        // for (int k) mask32.count();
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            // Map to array indexed to array indexed by local ID and sub group
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                std::set<int> active_work_items;
-                int catch_frist_active = -1;
-
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
-                    {
-                        if (catch_frist_active == -1)
-                        {
-                            catch_frist_active = i;
-                        }
-                        active_work_items.insert(i);
-                    }
-                }
-                if (active_work_items.empty())
-                {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-                else
-                {
-                    tr = TypeManager<Ty>::identify_limits(operation);
-                    for (const int &active_work_item : active_work_items)
-                    {
-                        rr = my[ii + active_work_item];
-                        if (active_work_items.size() == 1)
-                        {
-                            tr = mx[ii + catch_frist_active];
-                        }
-                        else
-                        {
-                            tr = calculate<Ty>(tr, mx[ii + active_work_item],
-                                               operation);
-                        }
-                        if (!compare_ordered<Ty>(rr, tr))
-                        {
-                            log_error(
-                                "ERROR: %s_%s(%s) "
-                                "mismatch for local id %d in sub group %d "
-                                "in "
-                                "group %d Expected: %d Obtained: %d\n",
-                                func_name.c_str(), operation_names(operation),
-                                TypeManager<Ty>::name(), active_work_item, j, k,
-                                tr, rr);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-};
-
-// Test for reduce non uniform functions
-template <typename Ty, ArithmeticOp operation> struct RED_NU
-{
-
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        ng = ng / nw;
-        std::string func_name;
-
-        work_items_mask ? func_name = "sub_group_non_uniform_reduce"
-                        : func_name = "sub_group_reduce";
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
-        Ty tr, rr;
-
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_reduce"
-                        : func_name = "sub_group_reduce";
-
-        for (k = 0; k < ng; ++k)
-        {
-            // Map to array indexed to array indexed by local ID and sub
-            // group
-            for (j = 0; j < nw; ++j)
-            {
-                mx[j] = x[j];
-                my[j] = y[j];
-            }
-
-            uint32_t use_work_items_mask;
-            use_work_items_mask =
-                !work_items_mask ? 0xFFFFFFFF : work_items_mask;
-
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                std::set<int> active_work_items;
-                int catch_frist_active = -1;
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
-                    {
-                        if (catch_frist_active == -1)
-                        {
-                            catch_frist_active = i;
-                            tr = mx[ii + i];
-                            active_work_items.insert(i);
-                            continue;
-                        }
-                        active_work_items.insert(i);
-                        tr = calculate<Ty>(tr, mx[ii + i], operation);
-                    }
-                }
-
-                if (active_work_items.empty())
-                {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-
-                for (const int &active_work_item : active_work_items)
-                {
-                    rr = my[ii + active_work_item];
-                    if (!compare_ordered<Ty>(rr, tr))
-                    {
-                        log_error("ERROR: %s_%s(%s) "
-                                  "mismatch for local id %d in sub group %d in "
-                                  "group %d Expected: %d Obtained: %d\n",
-                                  func_name.c_str(), operation_names(operation),
-                                  TypeManager<Ty>::name(), active_work_item, j,
-                                  k, tr, rr);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-};
-
-#endif

diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 93673b3..6e84ccb 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h

@@ -19,176 +19,13 @@
 #include "testHarness.h"
 #include "kernelHelpers.h"
 #include "typeWrappers.h"
-#include "imageHelpers.h"
 
 #include <limits>
 #include <vector>
-#include <type_traits>
-
-#define NR_OF_ACTIVE_WORK_ITEMS 4
-
-extern MTdata gMTdata;
-
-struct WorkGroupParams
-{
-    WorkGroupParams(size_t gws, size_t lws,
-                    const std::vector<std::string> &req_ext = {},
-                    const std::vector<uint32_t> &all_wim = {})
-        : global_workgroup_size(gws), local_workgroup_size(lws),
-          required_extensions(req_ext), all_work_item_masks(all_wim)
-    {
-        subgroup_size = 0;
-        work_items_mask = 0;
-        use_core_subgroups = true;
-        dynsc = 0;
-    }
-    size_t global_workgroup_size;
-    size_t local_workgroup_size;
-    size_t subgroup_size;
-    uint32_t work_items_mask;
-    int dynsc;
-    bool use_core_subgroups;
-    std::vector<std::string> required_extensions;
-    std::vector<uint32_t> all_work_item_masks;
-};
-
-enum class SubgroupsBroadcastOp
-{
-    broadcast,
-    broadcast_first,
-    non_uniform_broadcast
-};
-
-enum class NonUniformVoteOp
-{
-    elect,
-    all,
-    any,
-    all_equal
-};
-
-enum class BallotOp
-{
-    ballot,
-    inverse_ballot,
-    ballot_bit_extract,
-    ballot_bit_count,
-    ballot_inclusive_scan,
-    ballot_exclusive_scan,
-    ballot_find_lsb,
-    ballot_find_msb,
-    eq_mask,
-    ge_mask,
-    gt_mask,
-    le_mask,
-    lt_mask,
-};
-
-enum class ShuffleOp
-{
-    shuffle,
-    shuffle_up,
-    shuffle_down,
-    shuffle_xor
-};
-
-enum class ArithmeticOp
-{
-    add_,
-    max_,
-    min_,
-    mul_,
-    and_,
-    or_,
-    xor_,
-    logical_and,
-    logical_or,
-    logical_xor
-};
-
-static const char *const operation_names(ArithmeticOp operation)
-{
-    switch (operation)
-    {
-        case ArithmeticOp::add_: return "add";
-        case ArithmeticOp::max_: return "max";
-        case ArithmeticOp::min_: return "min";
-        case ArithmeticOp::mul_: return "mul";
-        case ArithmeticOp::and_: return "and";
-        case ArithmeticOp::or_: return "or";
-        case ArithmeticOp::xor_: return "xor";
-        case ArithmeticOp::logical_and: return "logical_and";
-        case ArithmeticOp::logical_or: return "logical_or";
-        case ArithmeticOp::logical_xor: return "logical_xor";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(BallotOp operation)
-{
-    switch (operation)
-    {
-        case BallotOp::ballot: return "ballot";
-        case BallotOp::inverse_ballot: return "inverse_ballot";
-        case BallotOp::ballot_bit_extract: return "bit_extract";
-        case BallotOp::ballot_bit_count: return "bit_count";
-        case BallotOp::ballot_inclusive_scan: return "inclusive_scan";
-        case BallotOp::ballot_exclusive_scan: return "exclusive_scan";
-        case BallotOp::ballot_find_lsb: return "find_lsb";
-        case BallotOp::ballot_find_msb: return "find_msb";
-        case BallotOp::eq_mask: return "eq";
-        case BallotOp::ge_mask: return "ge";
-        case BallotOp::gt_mask: return "gt";
-        case BallotOp::le_mask: return "le";
-        case BallotOp::lt_mask: return "lt";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(ShuffleOp operation)
-{
-    switch (operation)
-    {
-        case ShuffleOp::shuffle: return "shuffle";
-        case ShuffleOp::shuffle_up: return "shuffle_up";
-        case ShuffleOp::shuffle_down: return "shuffle_down";
-        case ShuffleOp::shuffle_xor: return "shuffle_xor";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(NonUniformVoteOp operation)
-{
-    switch (operation)
-    {
-        case NonUniformVoteOp::all: return "all";
-        case NonUniformVoteOp::all_equal: return "all_equal";
-        case NonUniformVoteOp::any: return "any";
-        case NonUniformVoteOp::elect: return "elect";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(SubgroupsBroadcastOp operation)
-{
-    switch (operation)
-    {
-        case SubgroupsBroadcastOp::broadcast: return "broadcast";
-        case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
-        case SubgroupsBroadcastOp::non_uniform_broadcast:
-            return "non_uniform_broadcast";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-}
 
 class subgroupsAPI {
 public:
-    subgroupsAPI(cl_platform_id platform, bool use_core_subgroups)
+    subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups)
     {
         static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE
                           == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
@@ -196,7 +33,7 @@
         static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
                           == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
                       "Enums have to be the same");
-        if (use_core_subgroups)
+        if (useCoreSubgroups)
         {
             _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo;
             clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo";
@@ -219,76 +56,163 @@
     clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr;
 };
 
-// Need to defined custom type for vector size = 3 and half type. This is
-// because of 3-component types are otherwise indistinguishable from the
-// 4-component types, and because the half type is indistinguishable from some
-// other 16-bit type (ushort)
-namespace subgroups {
-struct cl_char3
+// Some template helpers
+template <typename Ty> struct TypeName;
+template <> struct TypeName<cl_half>
 {
-    ::cl_char3 data;
+    static const char *val() { return "half"; }
 };
-struct cl_uchar3
+template <> struct TypeName<cl_uint>
 {
-    ::cl_uchar3 data;
+    static const char *val() { return "uint"; }
 };
-struct cl_short3
+template <> struct TypeName<cl_int>
 {
-    ::cl_short3 data;
+    static const char *val() { return "int"; }
 };
-struct cl_ushort3
+template <> struct TypeName<cl_ulong>
 {
-    ::cl_ushort3 data;
+    static const char *val() { return "ulong"; }
 };
-struct cl_int3
+template <> struct TypeName<cl_long>
 {
-    ::cl_int3 data;
+    static const char *val() { return "long"; }
 };
-struct cl_uint3
+template <> struct TypeName<float>
 {
-    ::cl_uint3 data;
+    static const char *val() { return "float"; }
 };
-struct cl_long3
+template <> struct TypeName<double>
 {
-    ::cl_long3 data;
+    static const char *val() { return "double"; }
 };
-struct cl_ulong3
+
+template <typename Ty> struct TypeDef;
+template <> struct TypeDef<cl_half>
 {
-    ::cl_ulong3 data;
+    static const char *val() { return "typedef half Type;\n"; }
 };
-struct cl_float3
+template <> struct TypeDef<cl_uint>
 {
-    ::cl_float3 data;
+    static const char *val() { return "typedef uint Type;\n"; }
 };
-struct cl_double3
+template <> struct TypeDef<cl_int>
 {
-    ::cl_double3 data;
+    static const char *val() { return "typedef int Type;\n"; }
 };
-struct cl_half
+template <> struct TypeDef<cl_ulong>
 {
-    ::cl_half data;
+    static const char *val() { return "typedef ulong Type;\n"; }
 };
-struct cl_half2
+template <> struct TypeDef<cl_long>
 {
-    ::cl_half2 data;
+    static const char *val() { return "typedef long Type;\n"; }
 };
-struct cl_half3
+template <> struct TypeDef<float>
 {
-    ::cl_half3 data;
+    static const char *val() { return "typedef float Type;\n"; }
 };
-struct cl_half4
+template <> struct TypeDef<double>
 {
-    ::cl_half4 data;
+    static const char *val() { return "typedef double Type;\n"; }
 };
-struct cl_half8
+
+template <typename Ty, int Which> struct TypeIdentity;
+// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return
+// (cl_half)0.0; } }; template <> struct TypeIdentity<cl_half,0> { static
+// cl_half val() { return -(cl_half)65536.0; } }; template <> struct
+// TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; }
+// };
+
+template <> struct TypeIdentity<cl_uint, 0>
 {
-    ::cl_half8 data;
+    static cl_uint val() { return (cl_uint)0; }
 };
-struct cl_half16
+template <> struct TypeIdentity<cl_uint, 1>
 {
-    ::cl_half16 data;
+    static cl_uint val() { return (cl_uint)0; }
 };
-}
+template <> struct TypeIdentity<cl_uint, 2>
+{
+    static cl_uint val() { return (cl_uint)0xffffffff; }
+};
+
+template <> struct TypeIdentity<cl_int, 0>
+{
+    static cl_int val() { return (cl_int)0; }
+};
+template <> struct TypeIdentity<cl_int, 1>
+{
+    static cl_int val() { return (cl_int)0x80000000; }
+};
+template <> struct TypeIdentity<cl_int, 2>
+{
+    static cl_int val() { return (cl_int)0x7fffffff; }
+};
+
+template <> struct TypeIdentity<cl_ulong, 0>
+{
+    static cl_ulong val() { return (cl_ulong)0; }
+};
+template <> struct TypeIdentity<cl_ulong, 1>
+{
+    static cl_ulong val() { return (cl_ulong)0; }
+};
+template <> struct TypeIdentity<cl_ulong, 2>
+{
+    static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; }
+};
+
+template <> struct TypeIdentity<cl_long, 0>
+{
+    static cl_long val() { return (cl_long)0; }
+};
+template <> struct TypeIdentity<cl_long, 1>
+{
+    static cl_long val() { return (cl_long)0x8000000000000000ULL; }
+};
+template <> struct TypeIdentity<cl_long, 2>
+{
+    static cl_long val() { return (cl_long)0x7fffffffffffffffULL; }
+};
+
+
+template <> struct TypeIdentity<float, 0>
+{
+    static float val() { return 0.F; }
+};
+template <> struct TypeIdentity<float, 1>
+{
+    static float val() { return -std::numeric_limits<float>::infinity(); }
+};
+template <> struct TypeIdentity<float, 2>
+{
+    static float val() { return std::numeric_limits<float>::infinity(); }
+};
+
+template <> struct TypeIdentity<double, 0>
+{
+    static double val() { return 0.L; }
+};
+
+template <> struct TypeIdentity<double, 1>
+{
+    static double val() { return -std::numeric_limits<double>::infinity(); }
+};
+template <> struct TypeIdentity<double, 2>
+{
+    static double val() { return std::numeric_limits<double>::infinity(); }
+};
+
+template <typename Ty> struct TypeCheck;
+template <> struct TypeCheck<cl_uint>
+{
+    static bool val(cl_device_id) { return true; }
+};
+template <> struct TypeCheck<cl_int>
+{
+    static bool val(cl_device_id) { return true; }
+};
 
 static bool int64_ok(cl_device_id device)
 {
@@ -309,860 +233,43 @@
     return true;
 }
 
-static bool double_ok(cl_device_id device)
+template <> struct TypeCheck<cl_ulong>
 {
-    int error;
-    cl_device_fp_config c;
-    error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
-                            (void *)&c, NULL);
-    if (error)
+    static bool val(cl_device_id device) { return int64_ok(device); }
+};
+template <> struct TypeCheck<cl_long>
+{
+    static bool val(cl_device_id device) { return int64_ok(device); }
+};
+template <> struct TypeCheck<cl_float>
+{
+    static bool val(cl_device_id) { return true; }
+};
+template <> struct TypeCheck<cl_half>
+{
+    static bool val(cl_device_id device)
     {
-        log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
-        return false;
+        return is_extension_available(device, "cl_khr_fp16");
     }
-    return c != 0;
-}
-
-static bool half_ok(cl_device_id device)
+};
+template <> struct TypeCheck<double>
 {
-    int error;
-    cl_device_fp_config c;
-    error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG, sizeof(c),
-                            (void *)&c, NULL);
-    if (error)
+    static bool val(cl_device_id device)
     {
-        log_info("clGetDeviceInfo failed with CL_DEVICE_HALF_FP_CONFIG\n");
-        return false;
-    }
-    return c != 0;
-}
-
-template <typename Ty> struct CommonTypeManager
-{
-
-    static const char *name() { return ""; }
-    static const char *add_typedef() { return "\n"; }
-    typedef std::false_type is_vector_type;
-    typedef std::false_type is_sb_vector_size3;
-    typedef std::false_type is_sb_vector_type;
-    typedef std::false_type is_sb_scalar_type;
-    static const bool type_supported(cl_device_id) { return true; }
-    static const Ty identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
+        int error;
+        cl_device_fp_config c;
+        error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
+                                (void *)&c, NULL);
+        if (error)
         {
-            case ArithmeticOp::add_: return (Ty)0;
-            case ArithmeticOp::max_: return (std::numeric_limits<Ty>::min)();
-            case ArithmeticOp::min_: return (std::numeric_limits<Ty>::max)();
-            case ArithmeticOp::mul_: return (Ty)1;
-            case ArithmeticOp::and_: return (Ty)~0;
-            case ArithmeticOp::or_: return (Ty)0;
-            case ArithmeticOp::xor_: return (Ty)0;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-};
-
-template <typename> struct TypeManager;
-
-template <> struct TypeManager<cl_int> : public CommonTypeManager<cl_int>
-{
-    static const char *name() { return "int"; }
-    static const char *add_typedef() { return "typedef int Type;\n"; }
-    static cl_int identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return (cl_int)0;
-            case ArithmeticOp::max_:
-                return (std::numeric_limits<cl_int>::min)();
-            case ArithmeticOp::min_:
-                return (std::numeric_limits<cl_int>::max)();
-            case ArithmeticOp::mul_: return (cl_int)1;
-            case ArithmeticOp::and_: return (cl_int)~0;
-            case ArithmeticOp::or_: return (cl_int)0;
-            case ArithmeticOp::xor_: return (cl_int)0;
-            case ArithmeticOp::logical_and: return (cl_int)1;
-            case ArithmeticOp::logical_or: return (cl_int)0;
-            case ArithmeticOp::logical_xor: return (cl_int)0;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-};
-template <> struct TypeManager<cl_int2> : public CommonTypeManager<cl_int2>
-{
-    static const char *name() { return "int2"; }
-    static const char *add_typedef() { return "typedef int2 Type;\n"; }
-    typedef std::true_type is_vector_type;
-    using scalar_type = cl_int;
-};
-template <>
-struct TypeManager<subgroups::cl_int3>
-    : public CommonTypeManager<subgroups::cl_int3>
-{
-    static const char *name() { return "int3"; }
-    static const char *add_typedef() { return "typedef int3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_int;
-};
-template <> struct TypeManager<cl_int4> : public CommonTypeManager<cl_int4>
-{
-    static const char *name() { return "int4"; }
-    static const char *add_typedef() { return "typedef int4 Type;\n"; }
-    using scalar_type = cl_int;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_int8> : public CommonTypeManager<cl_int8>
-{
-    static const char *name() { return "int8"; }
-    static const char *add_typedef() { return "typedef int8 Type;\n"; }
-    using scalar_type = cl_int;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_int16> : public CommonTypeManager<cl_int16>
-{
-    static const char *name() { return "int16"; }
-    static const char *add_typedef() { return "typedef int16 Type;\n"; }
-    using scalar_type = cl_int;
-    typedef std::true_type is_vector_type;
-};
-// cl_uint
-template <> struct TypeManager<cl_uint> : public CommonTypeManager<cl_uint>
-{
-    static const char *name() { return "uint"; }
-    static const char *add_typedef() { return "typedef uint Type;\n"; }
-};
-template <> struct TypeManager<cl_uint2> : public CommonTypeManager<cl_uint2>
-{
-    static const char *name() { return "uint2"; }
-    static const char *add_typedef() { return "typedef uint2 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<subgroups::cl_uint3>
-    : public CommonTypeManager<subgroups::cl_uint3>
-{
-    static const char *name() { return "uint3"; }
-    static const char *add_typedef() { return "typedef uint3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_uint;
-};
-template <> struct TypeManager<cl_uint4> : public CommonTypeManager<cl_uint4>
-{
-    static const char *name() { return "uint4"; }
-    static const char *add_typedef() { return "typedef uint4 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_uint8> : public CommonTypeManager<cl_uint8>
-{
-    static const char *name() { return "uint8"; }
-    static const char *add_typedef() { return "typedef uint8 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_uint16> : public CommonTypeManager<cl_uint16>
-{
-    static const char *name() { return "uint16"; }
-    static const char *add_typedef() { return "typedef uint16 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-};
-// cl_short
-template <> struct TypeManager<cl_short> : public CommonTypeManager<cl_short>
-{
-    static const char *name() { return "short"; }
-    static const char *add_typedef() { return "typedef short Type;\n"; }
-};
-template <> struct TypeManager<cl_short2> : public CommonTypeManager<cl_short2>
-{
-    static const char *name() { return "short2"; }
-    static const char *add_typedef() { return "typedef short2 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<subgroups::cl_short3>
-    : public CommonTypeManager<subgroups::cl_short3>
-{
-    static const char *name() { return "short3"; }
-    static const char *add_typedef() { return "typedef short3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_short;
-};
-template <> struct TypeManager<cl_short4> : public CommonTypeManager<cl_short4>
-{
-    static const char *name() { return "short4"; }
-    static const char *add_typedef() { return "typedef short4 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_short8> : public CommonTypeManager<cl_short8>
-{
-    static const char *name() { return "short8"; }
-    static const char *add_typedef() { return "typedef short8 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<cl_short16> : public CommonTypeManager<cl_short16>
-{
-    static const char *name() { return "short16"; }
-    static const char *add_typedef() { return "typedef short16 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-};
-// cl_ushort
-template <> struct TypeManager<cl_ushort> : public CommonTypeManager<cl_ushort>
-{
-    static const char *name() { return "ushort"; }
-    static const char *add_typedef() { return "typedef ushort Type;\n"; }
-};
-template <>
-struct TypeManager<cl_ushort2> : public CommonTypeManager<cl_ushort2>
-{
-    static const char *name() { return "ushort2"; }
-    static const char *add_typedef() { return "typedef ushort2 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<subgroups::cl_ushort3>
-    : public CommonTypeManager<subgroups::cl_ushort3>
-{
-    static const char *name() { return "ushort3"; }
-    static const char *add_typedef() { return "typedef ushort3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_ushort;
-};
-template <>
-struct TypeManager<cl_ushort4> : public CommonTypeManager<cl_ushort4>
-{
-    static const char *name() { return "ushort4"; }
-    static const char *add_typedef() { return "typedef ushort4 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<cl_ushort8> : public CommonTypeManager<cl_ushort8>
-{
-    static const char *name() { return "ushort8"; }
-    static const char *add_typedef() { return "typedef ushort8 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<cl_ushort16> : public CommonTypeManager<cl_ushort16>
-{
-    static const char *name() { return "ushort16"; }
-    static const char *add_typedef() { return "typedef ushort16 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-};
-// cl_char
-template <> struct TypeManager<cl_char> : public CommonTypeManager<cl_char>
-{
-    static const char *name() { return "char"; }
-    static const char *add_typedef() { return "typedef char Type;\n"; }
-};
-template <> struct TypeManager<cl_char2> : public CommonTypeManager<cl_char2>
-{
-    static const char *name() { return "char2"; }
-    static const char *add_typedef() { return "typedef char2 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<subgroups::cl_char3>
-    : public CommonTypeManager<subgroups::cl_char3>
-{
-    static const char *name() { return "char3"; }
-    static const char *add_typedef() { return "typedef char3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_char;
-};
-template <> struct TypeManager<cl_char4> : public CommonTypeManager<cl_char4>
-{
-    static const char *name() { return "char4"; }
-    static const char *add_typedef() { return "typedef char4 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_char8> : public CommonTypeManager<cl_char8>
-{
-    static const char *name() { return "char8"; }
-    static const char *add_typedef() { return "typedef char8 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_char16> : public CommonTypeManager<cl_char16>
-{
-    static const char *name() { return "char16"; }
-    static const char *add_typedef() { return "typedef char16 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-};
-// cl_uchar
-template <> struct TypeManager<cl_uchar> : public CommonTypeManager<cl_uchar>
-{
-    static const char *name() { return "uchar"; }
-    static const char *add_typedef() { return "typedef uchar Type;\n"; }
-};
-template <> struct TypeManager<cl_uchar2> : public CommonTypeManager<cl_uchar2>
-{
-    static const char *name() { return "uchar2"; }
-    static const char *add_typedef() { return "typedef uchar2 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<subgroups::cl_uchar3>
-    : public CommonTypeManager<subgroups::cl_char3>
-{
-    static const char *name() { return "uchar3"; }
-    static const char *add_typedef() { return "typedef uchar3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_uchar;
-};
-template <> struct TypeManager<cl_uchar4> : public CommonTypeManager<cl_uchar4>
-{
-    static const char *name() { return "uchar4"; }
-    static const char *add_typedef() { return "typedef uchar4 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_uchar8> : public CommonTypeManager<cl_uchar8>
-{
-    static const char *name() { return "uchar8"; }
-    static const char *add_typedef() { return "typedef uchar8 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<cl_uchar16> : public CommonTypeManager<cl_uchar16>
-{
-    static const char *name() { return "uchar16"; }
-    static const char *add_typedef() { return "typedef uchar16 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-};
-// cl_long
-template <> struct TypeManager<cl_long> : public CommonTypeManager<cl_long>
-{
-    static const char *name() { return "long"; }
-    static const char *add_typedef() { return "typedef long Type;\n"; }
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <> struct TypeManager<cl_long2> : public CommonTypeManager<cl_long2>
-{
-    static const char *name() { return "long2"; }
-    static const char *add_typedef() { return "typedef long2 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_long3>
-    : public CommonTypeManager<subgroups::cl_long3>
-{
-    static const char *name() { return "long3"; }
-    static const char *add_typedef() { return "typedef long3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_long;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <> struct TypeManager<cl_long4> : public CommonTypeManager<cl_long4>
-{
-    static const char *name() { return "long4"; }
-    static const char *add_typedef() { return "typedef long4 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <> struct TypeManager<cl_long8> : public CommonTypeManager<cl_long8>
-{
-    static const char *name() { return "long8"; }
-    static const char *add_typedef() { return "typedef long8 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <> struct TypeManager<cl_long16> : public CommonTypeManager<cl_long16>
-{
-    static const char *name() { return "long16"; }
-    static const char *add_typedef() { return "typedef long16 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-// cl_ulong
-template <> struct TypeManager<cl_ulong> : public CommonTypeManager<cl_ulong>
-{
-    static const char *name() { return "ulong"; }
-    static const char *add_typedef() { return "typedef ulong Type;\n"; }
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <> struct TypeManager<cl_ulong2> : public CommonTypeManager<cl_ulong2>
-{
-    static const char *name() { return "ulong2"; }
-    static const char *add_typedef() { return "typedef ulong2 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_ulong3>
-    : public CommonTypeManager<subgroups::cl_ulong3>
-{
-    static const char *name() { return "ulong3"; }
-    static const char *add_typedef() { return "typedef ulong3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_ulong;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <> struct TypeManager<cl_ulong4> : public CommonTypeManager<cl_ulong4>
-{
-    static const char *name() { return "ulong4"; }
-    static const char *add_typedef() { return "typedef ulong4 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <> struct TypeManager<cl_ulong8> : public CommonTypeManager<cl_ulong8>
-{
-    static const char *name() { return "ulong8"; }
-    static const char *add_typedef() { return "typedef ulong8 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-template <>
-struct TypeManager<cl_ulong16> : public CommonTypeManager<cl_ulong16>
-{
-    static const char *name() { return "ulong16"; }
-    static const char *add_typedef() { return "typedef ulong16 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-};
-
-// cl_float
-template <> struct TypeManager<cl_float> : public CommonTypeManager<cl_float>
-{
-    static const char *name() { return "float"; }
-    static const char *add_typedef() { return "typedef float Type;\n"; }
-    static cl_float identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return 0.0f;
-            case ArithmeticOp::max_:
-                return -std::numeric_limits<float>::infinity();
-            case ArithmeticOp::min_:
-                return std::numeric_limits<float>::infinity();
-            case ArithmeticOp::mul_: return (cl_float)1;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-};
-template <> struct TypeManager<cl_float2> : public CommonTypeManager<cl_float2>
-{
-    static const char *name() { return "float2"; }
-    static const char *add_typedef() { return "typedef float2 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<subgroups::cl_float3>
-    : public CommonTypeManager<subgroups::cl_float3>
-{
-    static const char *name() { return "float3"; }
-    static const char *add_typedef() { return "typedef float3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_float;
-};
-template <> struct TypeManager<cl_float4> : public CommonTypeManager<cl_float4>
-{
-    static const char *name() { return "float4"; }
-    static const char *add_typedef() { return "typedef float4 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-};
-template <> struct TypeManager<cl_float8> : public CommonTypeManager<cl_float8>
-{
-    static const char *name() { return "float8"; }
-    static const char *add_typedef() { return "typedef float8 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-};
-template <>
-struct TypeManager<cl_float16> : public CommonTypeManager<cl_float16>
-{
-    static const char *name() { return "float16"; }
-    static const char *add_typedef() { return "typedef float16 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-};
-
-// cl_double
-template <> struct TypeManager<cl_double> : public CommonTypeManager<cl_double>
-{
-    static const char *name() { return "double"; }
-    static const char *add_typedef() { return "typedef double Type;\n"; }
-    static cl_double identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return 0.0;
-            case ArithmeticOp::max_:
-                return -std::numeric_limits<double>::infinity();
-            case ArithmeticOp::min_:
-                return std::numeric_limits<double>::infinity();
-            case ArithmeticOp::mul_: return (cl_double)1;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-};
-template <>
-struct TypeManager<cl_double2> : public CommonTypeManager<cl_double2>
-{
-    static const char *name() { return "double2"; }
-    static const char *add_typedef() { return "typedef double2 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_double3>
-    : public CommonTypeManager<subgroups::cl_double3>
-{
-    static const char *name() { return "double3"; }
-    static const char *add_typedef() { return "typedef double3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_double;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-};
-template <>
-struct TypeManager<cl_double4> : public CommonTypeManager<cl_double4>
-{
-    static const char *name() { return "double4"; }
-    static const char *add_typedef() { return "typedef double4 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-};
-template <>
-struct TypeManager<cl_double8> : public CommonTypeManager<cl_double8>
-{
-    static const char *name() { return "double8"; }
-    static const char *add_typedef() { return "typedef double8 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-};
-template <>
-struct TypeManager<cl_double16> : public CommonTypeManager<cl_double16>
-{
-    static const char *name() { return "double16"; }
-    static const char *add_typedef() { return "typedef double16 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-};
-
-// cl_half
-template <>
-struct TypeManager<subgroups::cl_half>
-    : public CommonTypeManager<subgroups::cl_half>
-{
-    static const char *name() { return "half"; }
-    static const char *add_typedef() { return "typedef half Type;\n"; }
-    typedef std::true_type is_sb_scalar_type;
-    static subgroups::cl_half identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return { 0x0000 };
-            case ArithmeticOp::max_: return { 0xfc00 };
-            case ArithmeticOp::min_: return { 0x7c00 };
-            case ArithmeticOp::mul_: return { 0x3c00 };
-            default: log_error("Unknown operation request"); break;
-        }
-        return { 0 };
-    }
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_half2>
-    : public CommonTypeManager<subgroups::cl_half2>
-{
-    static const char *name() { return "half2"; }
-    static const char *add_typedef() { return "typedef half2 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_half3>
-    : public CommonTypeManager<subgroups::cl_half3>
-{
-    static const char *name() { return "half3"; }
-    static const char *add_typedef() { return "typedef half3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = subgroups::cl_half;
-
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_half4>
-    : public CommonTypeManager<subgroups::cl_half4>
-{
-    static const char *name() { return "half4"; }
-    static const char *add_typedef() { return "typedef half4 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_half8>
-    : public CommonTypeManager<subgroups::cl_half8>
-{
-    static const char *name() { return "half8"; }
-    static const char *add_typedef() { return "typedef half8 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-};
-template <>
-struct TypeManager<subgroups::cl_half16>
-    : public CommonTypeManager<subgroups::cl_half16>
-{
-    static const char *name() { return "half16"; }
-    static const char *add_typedef() { return "typedef half16 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-};
-
-// set scalar value to vector of halfs
-template <typename Ty, int N = 0>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-{
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-        lhs.data.s[i] = rhs;
-    }
-}
-
-
-// set scalar value to vector
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-{
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-        lhs.s[i] = rhs;
-    }
-}
-
-// set vector to vector value
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
-set_value(Ty &lhs, const Ty &rhs)
-{
-    lhs = rhs;
-}
-
-// set scalar value to vector size 3
-template <typename Ty, int N = 0>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-{
-    for (auto i = 0; i < 3; ++i)
-    {
-        lhs.data.s[i] = rhs;
-    }
-}
-
-// set scalar value to scalar
-template <typename Ty>
-typename std::enable_if<std::is_scalar<Ty>::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-{
-    lhs = static_cast<Ty>(rhs);
-}
-
-// set scalar value to half scalar
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-{
-    lhs.data = rhs;
-}
-
-// compare for common vectors
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_vector_type::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-{
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-        if (lhs.s[i] != rhs.s[i])
-        {
+            log_info(
+                "clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
             return false;
         }
+        return c != 0;
     }
-    return true;
-}
+};
 
-// compare for vectors 3
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-{
-    for (auto i = 0; i < 3; ++i)
-    {
-        if (lhs.data.s[i] != rhs.data.s[i])
-        {
-            return false;
-        }
-    }
-    return true;
-}
-
-// compare for half vectors
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-{
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-        if (lhs.data.s[i] != rhs.data.s[i])
-        {
-            return false;
-        }
-    }
-    return true;
-}
-
-// compare for scalars
-template <typename Ty>
-typename std::enable_if<std::is_scalar<Ty>::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-{
-    return lhs == rhs;
-}
-
-// compare for scalar halfs
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-{
-    return lhs.data == rhs.data;
-}
-
-template <typename Ty> inline bool compare_ordered(const Ty &lhs, const Ty &rhs)
-{
-    return lhs == rhs;
-}
-
-template <>
-inline bool compare_ordered(const subgroups::cl_half &lhs,
-                            const subgroups::cl_half &rhs)
-{
-    return cl_half_to_float(lhs.data) == cl_half_to_float(rhs.data);
-}
-
-template <typename Ty>
-inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs)
-{
-    return cl_half_to_float(lhs.data) == rhs;
-}
 
 // Run a test kernel to compute the result of a built-in on an input
 static int run_kernel(cl_context context, cl_command_queue queue,
@@ -1211,9 +318,6 @@
                                  NULL);
     test_error(error, "clEnqueueWriteBuffer failed");
 
-    error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
-                                 NULL);
-    test_error(error, "clEnqueueWriteBuffer failed");
     error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
                                    NULL, NULL);
     test_error(error, "clEnqueueNDRangeKernel failed");
@@ -1233,114 +337,61 @@
 }
 
 // Driver for testing a single built in function
-template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
+template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE,
+          size_t TSIZE = 0>
+struct test
 {
-    static int mrun(cl_device_id device, cl_context context,
-                    cl_command_queue queue, int num_elements, const char *kname,
-                    const char *src, WorkGroupParams test_params)
-    {
-        int error = TEST_PASS;
-        for (auto &mask : test_params.all_work_item_masks)
-        {
-            test_params.work_items_mask = mask;
-            error |= run(device, context, queue, num_elements, kname, src,
-                         test_params);
-        }
-        return error;
-    };
     static int run(cl_device_id device, cl_context context,
                    cl_command_queue queue, int num_elements, const char *kname,
-                   const char *src, WorkGroupParams test_params)
+                   const char *src, int dynscl, bool useCoreSubgroups)
     {
         size_t tmp;
         int error;
         int subgroup_size, num_subgroups;
         size_t realSize;
-        size_t global = test_params.global_workgroup_size;
-        size_t local = test_params.local_workgroup_size;
+        size_t global;
+        size_t local;
         clProgramWrapper program;
         clKernelWrapper kernel;
         cl_platform_id platform;
-        std::vector<cl_int> sgmap;
-        sgmap.resize(4 * global);
-        std::vector<Ty> mapin;
-        mapin.resize(local);
-        std::vector<Ty> mapout;
-        mapout.resize(local);
-        std::stringstream kernel_sstr;
-        if (test_params.work_items_mask != 0)
-        {
-            kernel_sstr << "#define WORK_ITEMS_MASK ";
-            kernel_sstr << "0x" << std::hex << test_params.work_items_mask
-                        << "\n";
-        }
+        cl_int sgmap[2 * GSIZE];
+        Ty mapin[LSIZE];
+        Ty mapout[LSIZE];
 
-
-        kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
-        kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
         // Make sure a test of type Ty is supported by the device
-        if (!TypeManager<Ty>::type_supported(device))
-        {
-            log_info("Data type not supported : %s\n", TypeManager<Ty>::name());
-            return 0;
-        }
-        else
-        {
-            if (strstr(TypeManager<Ty>::name(), "double"))
-            {
-                kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
-            }
-            else if (strstr(TypeManager<Ty>::name(), "half"))
-            {
-                kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
-            }
-        }
-
-        for (std::string extension : test_params.required_extensions)
-        {
-            if (!is_extension_available(device, extension.c_str()))
-            {
-                log_info("The extension %s not supported on this device. SKIP "
-                         "testing - kernel %s data type %s\n",
-                         extension.c_str(), kname, TypeManager<Ty>::name());
-                return TEST_PASS;
-            }
-            kernel_sstr << "#pragma OPENCL EXTENSION " + extension
-                    + ": enable\n";
-        }
+        if (!TypeCheck<Ty>::val(device)) return 0;
 
         error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
                                 (void *)&platform, NULL);
         test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
-        if (test_params.use_core_subgroups)
+        std::stringstream kernel_sstr;
+        if (useCoreSubgroups)
         {
             kernel_sstr
                 << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
         }
         kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
                        "M[I].y = get_sub_group_id();\n";
-        kernel_sstr << TypeManager<Ty>::add_typedef();
+        kernel_sstr << TypeDef<Ty>::val();
         kernel_sstr << src;
         const std::string &kernel_str = kernel_sstr.str();
         const char *kernel_src = kernel_str.c_str();
 
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &kernel_src, kname);
+        error = create_single_kernel_helper_with_build_options(
+            context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0");
         if (error != 0) return error;
 
         // Determine some local dimensions to use for the test.
-        error = get_max_common_work_group_size(
-            context, kernel, test_params.global_workgroup_size, &local);
+        global = GSIZE;
+        error = get_max_common_work_group_size(context, kernel, GSIZE, &local);
         test_error(error, "get_max_common_work_group_size failed");
 
         // Limit it a bit so we have muliple work groups
-        // Ideally this will still be large enough to give us multiple
-        if (local > test_params.local_workgroup_size)
-            local = test_params.local_workgroup_size;
-
+        // Ideally this will still be large enough to give us multiple subgroups
+        if (local > LSIZE) local = LSIZE;
 
         // Get the sub group info
-        subgroupsAPI subgroupsApiSet(platform, test_params.use_core_subgroups);
+        subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
         clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
             subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
         if (clGetKernelSubGroupInfo_ptr == NULL)
@@ -1384,9 +435,8 @@
 
         std::vector<Ty> idata;
         std::vector<Ty> odata;
-        size_t input_array_size = global;
-        size_t output_array_size = global;
-        int dynscl = test_params.dynsc;
+        size_t input_array_size = GSIZE;
+        size_t output_array_size = GSIZE;
 
         if (dynscl != 0)
         {
@@ -1399,96 +449,28 @@
         odata.resize(output_array_size);
 
         // Run the kernel once on zeroes to get the map
-        memset(idata.data(), 0, input_array_size * sizeof(Ty));
-        error = run_kernel(context, queue, kernel, global, local, idata.data(),
-                           input_array_size * sizeof(Ty), sgmap.data(),
-                           global * sizeof(cl_int4), odata.data(),
+        memset(&idata[0], 0, input_array_size * sizeof(Ty));
+        error = run_kernel(context, queue, kernel, global, local, &idata[0],
+                           input_array_size * sizeof(Ty), sgmap,
+                           global * sizeof(cl_int) * 2, &odata[0],
                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-        test_error(error, "Running kernel first time failed");
+        if (error) return error;
 
         // Generate the desired input for the kernel
+        Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local,
+                 (int)global / (int)local);
 
-        test_params.subgroup_size = subgroup_size;
-        Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
-        error = run_kernel(context, queue, kernel, global, local, idata.data(),
-                           input_array_size * sizeof(Ty), sgmap.data(),
-                           global * sizeof(cl_int4), odata.data(),
+        error = run_kernel(context, queue, kernel, global, local, &idata[0],
+                           input_array_size * sizeof(Ty), sgmap,
+                           global * sizeof(cl_int) * 2, &odata[0],
                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-        test_error(error, "Running kernel second time failed");
+        if (error) return error;
+
 
         // Check the result
-        error = Fns::chk(idata.data(), odata.data(), mapin.data(),
-                         mapout.data(), sgmap.data(), test_params);
-        test_error(error, "Data verification failed");
-        return TEST_PASS;
+        return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap,
+                        subgroup_size, (int)local, (int)global / (int)local);
     }
 };
 
-static void set_last_workgroup_params(int non_uniform_size,
-                                      int &number_of_subgroups,
-                                      int subgroup_size, int &workgroup_size,
-                                      int &last_subgroup_size)
-{
-    number_of_subgroups = 1 + non_uniform_size / subgroup_size;
-    last_subgroup_size = non_uniform_size % subgroup_size;
-    workgroup_size = non_uniform_size;
-}
-
-template <typename Ty>
-static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset,
-                                        int current_sbs)
-{
-    int randomize_data = (int)(genrand_int32(gMTdata) % 3);
-    // Initialize data matrix indexed by local id and sub group id
-    switch (randomize_data)
-    {
-        case 0:
-            memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
-            break;
-        case 1: {
-            memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
-            int wi_id = (int)(genrand_int32(gMTdata) % (cl_uint)current_sbs);
-            set_value(workgroup[wg_offset + wi_id], 41);
-        }
-        break;
-        case 2:
-            memset(&workgroup[wg_offset], 0xff, current_sbs * sizeof(Ty));
-            break;
-    }
-}
-
-struct RunTestForType
-{
-    RunTestForType(cl_device_id device, cl_context context,
-                   cl_command_queue queue, int num_elements,
-                   WorkGroupParams test_params)
-        : device_(device), context_(context), queue_(queue),
-          num_elements_(num_elements), test_params_(test_params)
-    {}
-    template <typename T, typename U>
-    int run_impl(const char *kernel_name, const char *source)
-    {
-        int error = TEST_PASS;
-        if (test_params_.all_work_item_masks.size() > 0)
-        {
-            error = test<T, U>::mrun(device_, context_, queue_, num_elements_,
-                                     kernel_name, source, test_params_);
-        }
-        else
-        {
-            error = test<T, U>::run(device_, context_, queue_, num_elements_,
-                                    kernel_name, source, test_params_);
-        }
-
-        return error;
-    }
-
-private:
-    cl_device_id device_;
-    cl_context context_;
-    cl_command_queue queue_;
-    int num_elements_;
-    WorkGroupParams test_params_;
-};
-
 #endif

diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
index 47e42f6..e6ce1d2 100644
--- a/test_conformance/subgroups/test_barrier.cpp
+++ b/test_conformance/subgroups/test_barrier.cpp

@@ -59,15 +59,10 @@
 // barrier test functions
 template <int Which> struct BAR
 {
-    static void gen(cl_int *x, cl_int *t, cl_int *m,
-                    const WorkGroupParams &test_params)
+    static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
     {
         int i, ii, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
         int e;
 
         ii = 0;
@@ -84,7 +79,8 @@
             // Now map into work group using map from device
             for (j = 0; j < nw; ++j)
             {
-                x[j] = t[j];
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
             }
 
             x += nw;
@@ -93,14 +89,10 @@
     }
 
     static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+                   int ns, int nw, int ng)
     {
         int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
         cl_int tr, rr;
 
         if (Which == 0)
@@ -113,8 +105,9 @@
             // Map to array indexed to array indexed by local ID and sub group
             for (j = 0; j < nw; ++j)
             {
-                mx[j] = x[j];
-                my[j] = y[j];
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
             }
 
             for (j = 0; j < nj; ++j)
@@ -130,9 +123,8 @@
                     if (tr != rr)
                     {
                         log_error("ERROR: sub_group_barrier mismatch for local "
-                                  "id %d in sub group %d in group %d expected "
-                                  "%d got %d\n",
-                                  i, j, k, tr, rr);
+                                  "id %d in sub group %d in group %d\n",
+                                  i, j, k);
                         return -1;
                     }
                 }
@@ -152,18 +144,18 @@
                            cl_command_queue queue, int num_elements,
                            bool useCoreSubgroups)
 {
-    int error = TEST_PASS;
+    int error;
 
     // Adjust these individually below if desired/needed
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size);
-    test_params.use_core_subgroups = useCoreSubgroups;
-    error = test<cl_int, BAR<0>>::run(device, context, queue, num_elements,
-                                      "test_lbar", lbar_source, test_params);
-    error |= test<cl_int, BAR<1>, global_work_size>::run(
-        device, context, queue, num_elements, "test_gbar", gbar_source,
-        test_params);
+#define G 2000
+#define L 200
+
+    error = test<cl_int, BAR<0>, G, L>::run(device, context, queue,
+                                            num_elements, "test_lbar",
+                                            lbar_source, 0, useCoreSubgroups);
+    error = test<cl_int, BAR<1>, G, L, G>::run(
+        device, context, queue, num_elements, "test_gbar", gbar_source, 0,
+        useCoreSubgroups);
 
     return error;
 }

diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp
index 428f2cd..02850e5 100644
--- a/test_conformance/subgroups/test_ifp.cpp
+++ b/test_conformance/subgroups/test_ifp.cpp

@@ -46,7 +46,7 @@
     "#define INST_COUNT 0x3\n"
     "\n"
     "__kernel void\n"
-    "test_ifp(const __global int *in, __global int4 *xy, __global int *out)\n"
+    "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
     "{\n"
     "    __local atomic_int loc[NUM_LOC];\n"
     "\n"
@@ -225,15 +225,10 @@
 
 struct IFP
 {
-    static void gen(cl_int *x, cl_int *t, cl_int *,
-                    const WorkGroupParams &test_params)
+    static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
     {
         int k;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
 
         // We need at least 2 sub groups per group for this test
         if (nj == 1) return;
@@ -245,15 +240,11 @@
         }
     }
 
-    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
-                   const WorkGroupParams &test_params)
+    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
+                   int nw, int ng)
     {
         int i, k;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
 
         // We need at least 2 sub groups per group for this tes
         if (nj == 1) return 0;
@@ -284,17 +275,14 @@
 int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
              int num_elements, bool useCoreSubgroups)
 {
-    int error = TEST_PASS;
+    int error;
 
-    // Global/local work group sizes
     // Adjust these individually below if desired/needed
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size);
-    test_params.use_core_subgroups = useCoreSubgroups;
-    test_params.dynsc = NUM_LOC + 1;
-    error = test<cl_int, IFP>::run(device, context, queue, num_elements,
-                                   "test_ifp", ifp_source, test_params);
+#define G 2000
+#define L 200
+    error = test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
+                                         "test_ifp", ifp_source, NUM_LOC + 1,
+                                         useCoreSubgroups);
     return error;
 }
 

diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp
index 761ca7a..2ad3d7f 100644
--- a/test_conformance/subgroups/test_queries.cpp
+++ b/test_conformance/subgroups/test_queries.cpp

@@ -67,8 +67,9 @@
 
     const std::string &kernel_str = kernel_sstr.str();
     const char *kernel_src = kernel_str.c_str();
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "query_kernel");
+    error = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel, 1, &kernel_src, "query_kernel",
+        "-cl-std=CL2.0");
     if (error != 0) return error;
 
     // Determine some local dimensions to use for the test.

diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp
deleted file mode 100644
index c0e4952..0000000
--- a/test_conformance/subgroups/test_subgroup.cpp
+++ /dev/null

@@ -1,217 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_kernels.h"
-#include "subgroup_common_templates.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-namespace {
-// Any/All test functions
-template <NonUniformVoteOp operation> struct AA
-{
-    static void gen(cl_int *x, cl_int *t, cl_int *m,
-                    const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        int e;
-        ng = ng / nw;
-        ii = 0;
-        log_info("  sub_group_%s...\n", operation_names(operation));
-        for (k = 0; k < ng; ++k)
-        {
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                e = (int)(genrand_int32(gMTdata) % 3);
-
-                // Initialize data matrix indexed by local id and sub group id
-                switch (e)
-                {
-                    case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break;
-                    case 1:
-                        memset(&t[ii], 0, n * sizeof(cl_int));
-                        i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
-                        t[ii + i] = 41;
-                        break;
-                    case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break;
-                }
-            }
-
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            {
-                x[j] = t[j];
-            }
-
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-
-    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        cl_int taa, raa;
-        ng = ng / nw;
-
-        for (k = 0; k < ng; ++k)
-        {
-            // Map to array indexed to array indexed by local ID and sub group
-            for (j = 0; j < nw; ++j)
-            {
-                mx[j] = x[j];
-                my[j] = y[j];
-            }
-
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-
-                // Compute target
-                if (operation == NonUniformVoteOp::any)
-                {
-                    taa = 0;
-                    for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0;
-                }
-
-                if (operation == NonUniformVoteOp::all)
-                {
-                    taa = 1;
-                    for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0;
-                }
-
-                // Check result
-                for (i = 0; i < n; ++i)
-                {
-                    raa = my[ii + i] != 0;
-                    if (raa != taa)
-                    {
-                        log_error("ERROR: sub_group_%s mismatch for local id "
-                                  "%d in sub group %d in group %d\n",
-                                  operation_names(operation), i, j, k);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_%s... passed\n", operation_names(operation));
-        return TEST_PASS;
-    }
-};
-
-static const char *any_source = "__kernel void test_any(const __global Type "
-                                "*in, __global int4 *xy, __global Type *out)\n"
-                                "{\n"
-                                "    int gid = get_global_id(0);\n"
-                                "    XY(xy,gid);\n"
-                                "    out[gid] = sub_group_any(in[gid]);\n"
-                                "}\n";
-
-static const char *all_source = "__kernel void test_all(const __global Type "
-                                "*in, __global int4 *xy, __global Type *out)\n"
-                                "{\n"
-                                "    int gid = get_global_id(0);\n"
-                                "    XY(xy,gid);\n"
-                                "    out[gid] = sub_group_all(in[gid]);\n"
-                                "}\n";
-
-
-template <typename T>
-int run_broadcast_scan_reduction_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
-        "test_bcast", bcast_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
-                                                            redadd_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
-                                                            redmax_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
-                                                            redmin_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
-                                                             scinadd_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
-                                                             scinmax_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
-                                                             scinmin_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
-                                                             scexadd_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
-                                                             scexmax_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
-                                                             scexmin_source);
-    return error;
-}
-
-}
-// Entry point from main
-int test_subgroup_functions(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int num_elements,
-                            bool useCoreSubgroups)
-{
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error =
-        rft.run_impl<cl_int, AA<NonUniformVoteOp::any>>("test_any", any_source);
-    error |=
-        rft.run_impl<cl_int, AA<NonUniformVoteOp::all>>("test_all", all_source);
-    error |= run_broadcast_scan_reduction_for_type<cl_int>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_uint>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_long>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_ulong>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_float>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_double>(rft);
-    error |= run_broadcast_scan_reduction_for_type<subgroups::cl_half>(rft);
-    return error;
-}
-
-int test_subgroup_functions_core(cl_device_id device, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-{
-    return test_subgroup_functions(device, context, queue, num_elements, true);
-}
-
-int test_subgroup_functions_ext(cl_device_id device, cl_context context,
-                                cl_command_queue queue, int num_elements)
-{
-    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
-
-    if (!hasExtension)
-    {
-        log_info(
-            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-    return test_subgroup_functions(device, context, queue, num_elements, false);
-}

diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
deleted file mode 100644
index f2e4060..0000000
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ /dev/null

@@ -1,1089 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-#include <bitset>
-
-namespace {
-// Test for ballot functions
-template <typename Ty> struct BALLOT
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        // no work here
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int non_uniform_size = gws % lws;
-        log_info("  sub_group_ballot...\n");
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int current_sbs = 0;
-        cl_uint expected_result, device_result;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        wg_number = non_uniform_size ? wg_number + 1 : wg_number;
-        int last_subgroup_size = 0;
-
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                // read device outputs for work_group
-                my[wi_id] = y[wi_id];
-            }
-
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                {
-                    device_result = my[wg_offset + wi_id];
-                    expected_result = 1;
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error(
-                            "ERROR: sub_group_ballot mismatch for local id "
-                            "%d in sub group %d in group %d obtained {%d}, "
-                            "expected {%d} \n",
-                            wi_id, sb_id, wg_id, device_result,
-                            expected_result);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  sub_group_ballot... passed\n");
-        return TEST_PASS;
-    }
-};
-
-// Test for bit extract ballot functions
-template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int wi_id, sb_id, wg_id, l;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int wg_number = gws / lws;
-        int limit_sbs = sbs > 100 ? 100 : sbs;
-        int non_uniform_size = gws % lws;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                // rand index to bit extract
-                int index_for_odd = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                    % (limit_sbs > current_sbs ? current_sbs : limit_sbs);
-                int index_for_even = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                    % (limit_sbs > current_sbs ? current_sbs : limit_sbs);
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                {
-                    // index of the third element int the vector.
-                    int midx = 4 * wg_offset + 4 * wi_id + 2;
-                    // storing information about index to bit extract
-                    m[midx] = (cl_int)index_for_odd;
-                    m[++midx] = (cl_int)index_for_even;
-                }
-                set_randomdata_for_subgroup<Ty>(t, wg_offset, current_sbs);
-            }
-
-            // Now map into work group using map from device
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            {
-                x[wi_id] = t[wi_id];
-            }
-
-            x += lws;
-            m += 4 * lws;
-        }
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, l, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int wg_number = gws / lws;
-        cl_uint4 expected_result, device_result;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-        int non_uniform_size = gws % lws;
-
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            // Map to array indexed to array indexed by local ID and sub group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                // read host inputs for work_group
-                mx[wi_id] = x[wi_id];
-                // read device outputs for work_group
-                my[wi_id] = y[wi_id];
-            }
-
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                // take index of array where info which work_item will
-                // be broadcast its value is stored
-                int midx = 4 * wg_offset + 2;
-                // take subgroup local id of this work_item
-                int index_for_odd = (int)m[midx];
-                int index_for_even = (int)m[++midx];
-
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // for each subgroup
-                    int bit_value = 0;
-                    // from which value of bitfield bit
-                    // verification will be done
-                    int take_shift =
-                        (wi_id & 1) ? index_for_odd % 32 : index_for_even % 32;
-                    int bit_mask = 1 << take_shift;
-
-                    if (wi_id < 32)
-                        (mx[wg_offset + wi_id].s0 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-                    if (wi_id >= 32 && wi_id < 64)
-                        (mx[wg_offset + wi_id].s1 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-                    if (wi_id >= 64 && wi_id < 96)
-                        (mx[wg_offset + wi_id].s2 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-                    if (wi_id >= 96 && wi_id < 128)
-                        (mx[wg_offset + wi_id].s3 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-
-                    if (wi_id & 1)
-                    {
-                        bit_value ? expected_result = { 1, 0, 0, 1 }
-                                  : expected_result = { 0, 0, 0, 1 };
-                    }
-                    else
-                    {
-                        bit_value ? expected_result = { 1, 0, 0, 2 }
-                                  : expected_result = { 0, 0, 0, 2 };
-                    }
-
-                    device_result = my[wg_offset + wi_id];
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error(
-                            "ERROR: sub_group_%s mismatch for local id %d in "
-                            "sub group %d in group %d obtained {%d, %d, %d, "
-                            "%d}, expected {%d, %d, %d, %d}\n",
-                            operation_names(operation), wi_id, sb_id, wg_id,
-                            device_result.s0, device_result.s1,
-                            device_result.s2, device_result.s3,
-                            expected_result.s0, expected_result.s1,
-                            expected_result.s2, expected_result.s3);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-};
-
-template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int non_uniform_size = gws % lws;
-        log_info("  sub_group_inverse_ballot...\n");
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-        // no work here
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        cl_uint4 expected_result, device_result;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-        if (non_uniform_size) wg_number++;
-
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            // Map to array indexed to array indexed by local ID and sub group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                mx[wi_id] = x[wi_id]; // read host inputs for work_group
-                my[wi_id] = y[wi_id]; // read device outputs for work_group
-            }
-
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                // take index of array where info which work_item will
-                // be broadcast its value is stored
-                int midx = 4 * wg_offset + 2;
-                // take subgroup local id of this work_item
-                // Check result
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // for each subgroup work item
-
-                    wi_id & 1 ? expected_result = { 1, 0, 0, 1 }
-                              : expected_result = { 1, 0, 0, 2 };
-
-                    device_result = my[wg_offset + wi_id];
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error(
-                            "ERROR: sub_group_%s mismatch for local id %d in "
-                            "sub group %d in group %d obtained {%d, %d, %d, "
-                            "%d}, expected {%d, %d, %d, %d}\n",
-                            operation_names(operation), wi_id, sb_id, wg_id,
-                            device_result.s0, device_result.s1,
-                            device_result.s2, device_result.s3,
-                            expected_result.s0, expected_result.s1,
-                            expected_result.s2, expected_result.s3);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-
-        log_info("  sub_group_inverse_ballot... passed\n");
-        return TEST_PASS;
-    }
-};
-
-
-// Test for bit count/inclusive and exclusive scan/ find lsb msb ballot function
-template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-            wg_number++;
-        }
-        int e;
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                if (operation == BallotOp::ballot_bit_count
-                    || operation == BallotOp::ballot_inclusive_scan
-                    || operation == BallotOp::ballot_exclusive_scan)
-                {
-                    set_randomdata_for_subgroup<Ty>(t, wg_offset, current_sbs);
-                }
-                else if (operation == BallotOp::ballot_find_lsb
-                         || operation == BallotOp::ballot_find_msb)
-                {
-                    // Regarding to the spec, find lsb and find msb result is
-                    // undefined behavior if input value is zero, so generate
-                    // only non-zero values.
-                    for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                    {
-                        char x = (genrand_int32(gMTdata)) & 0xff;
-                        // undefined behaviour in case of 0;
-                        x = x ? x : 1;
-                        memset(&t[wg_offset + wi_id], x, sizeof(Ty));
-                    }
-                }
-                else
-                {
-                    log_error("Unknown operation...");
-                }
-            }
-
-            // Now map into work group using map from device
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            {
-                x[wi_id] = t[wi_id];
-            }
-
-            x += lws;
-            m += 4 * lws;
-        }
-    }
-
-    static bs128 getImportantBits(cl_uint sub_group_local_id,
-                                  cl_uint sub_group_size)
-    {
-        bs128 mask;
-        if (operation == BallotOp::ballot_bit_count
-            || operation == BallotOp::ballot_find_lsb
-            || operation == BallotOp::ballot_find_msb)
-        {
-            for (cl_uint i = 0; i < sub_group_size; ++i) mask.set(i);
-        }
-        else if (operation == BallotOp::ballot_inclusive_scan
-                 || operation == BallotOp::ballot_exclusive_scan)
-        {
-            for (cl_uint i = 0; i <= sub_group_local_id; ++i) mask.set(i);
-            if (operation == BallotOp::ballot_exclusive_scan)
-                mask.reset(sub_group_local_id);
-        }
-        return mask;
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        wg_number = non_uniform_size ? wg_number + 1 : wg_number;
-        cl_uint4 expected_result, device_result;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            // Map to array indexed to array indexed by local ID and sub group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                // read host inputs for work_group
-                mx[wi_id] = x[wi_id];
-                // read device outputs for work_group
-                my[wi_id] = y[wi_id];
-            }
-
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                // Check result
-                expected_result = { 0, 0, 0, 0 };
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // for subgroup element
-                    bs128 bs;
-                    // convert cl_uint4 input into std::bitset<128>
-                    bs |= bs128(mx[wg_offset + wi_id].s0)
-                        | (bs128(mx[wg_offset + wi_id].s1) << 32)
-                        | (bs128(mx[wg_offset + wi_id].s2) << 64)
-                        | (bs128(mx[wg_offset + wi_id].s3) << 96);
-                    bs &= getImportantBits(wi_id, current_sbs);
-                    device_result = my[wg_offset + wi_id];
-                    if (operation == BallotOp::ballot_inclusive_scan
-                        || operation == BallotOp::ballot_exclusive_scan
-                        || operation == BallotOp::ballot_bit_count)
-                    {
-                        expected_result.s0 = bs.count();
-                        if (!compare(device_result, expected_result))
-                        {
-                            log_error("ERROR: sub_group_%s "
-                                      "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      operation_names(operation), wi_id, sb_id,
-                                      wg_id, device_result.s0, device_result.s1,
-                                      device_result.s2, device_result.s3,
-                                      expected_result.s0, expected_result.s1,
-                                      expected_result.s2, expected_result.s3);
-                            return TEST_FAIL;
-                        }
-                    }
-                    else if (operation == BallotOp::ballot_find_lsb)
-                    {
-                        for (int id = 0; id < current_sbs; ++id)
-                        {
-                            if (bs.test(id))
-                            {
-                                expected_result.s0 = id;
-                                break;
-                            }
-                        }
-                        if (!compare(device_result, expected_result))
-                        {
-                            log_error("ERROR: sub_group_ballot_find_lsb "
-                                      "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      wi_id, sb_id, wg_id, device_result.s0,
-                                      device_result.s1, device_result.s2,
-                                      device_result.s3, expected_result.s0,
-                                      expected_result.s1, expected_result.s2,
-                                      expected_result.s3);
-                            return TEST_FAIL;
-                        }
-                    }
-                    else if (operation == BallotOp::ballot_find_msb)
-                    {
-                        for (int id = current_sbs - 1; id >= 0; --id)
-                        {
-                            if (bs.test(id))
-                            {
-                                expected_result.s0 = id;
-                                break;
-                            }
-                        }
-                        if (!compare(device_result, expected_result))
-                        {
-                            log_error("ERROR: sub_group_ballot_find_msb "
-                                      "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      wi_id, sb_id, wg_id, device_result.s0,
-                                      device_result.s1, device_result.s2,
-                                      device_result.s3, expected_result.s0,
-                                      expected_result.s1, expected_result.s2,
-                                      expected_result.s3);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  sub_group_ballot_%s(%s)... passed\n",
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-};
-
-// test mask functions
-template <typename Ty, BallotOp operation> struct SMASK
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, l, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int wg_number = gws / lws;
-        log_info("  get_sub_group_%s_mask...\n", operation_names(operation));
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                // Produce expected masks for each work item in the subgroup
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                {
-                    int midx = 4 * wg_offset + 4 * wi_id;
-                    cl_uint max_sub_group_size = m[midx + 2];
-                    cl_uint4 expected_mask = { 0 };
-                    expected_mask = generate_bit_mask(
-                        wi_id, operation_names(operation), max_sub_group_size);
-                    set_value(t[wg_offset + wi_id], expected_mask);
-                }
-            }
-
-            // Now map into work group using map from device
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            {
-                x[wi_id] = t[wi_id];
-            }
-            x += lws;
-            m += 4 * lws;
-        }
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        Ty expected_result, device_result;
-        int wg_number = gws / lws;
-
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                mx[wi_id] = x[wi_id]; // read host inputs for work_group
-                my[wi_id] = y[wi_id]; // read device outputs for work_group
-            }
-
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            {
-                int wg_offset = sb_id * sbs;
-                int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-
-                // Check result
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // inside the subgroup
-                    expected_result =
-                        mx[wg_offset + wi_id]; // read host input for subgroup
-                    device_result =
-                        my[wg_offset
-                           + wi_id]; // read device outputs for subgroup
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error("ERROR:  get_sub_group_%s_mask... mismatch "
-                                  "for local id %d in sub group %d in group "
-                                  "%d, obtained %d, expected %d\n",
-                                  operation_names(operation), wi_id, sb_id,
-                                  wg_id, device_result, expected_result);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  get_sub_group_%s_mask... passed\n",
-                 operation_names(operation));
-        return TEST_PASS;
-    }
-};
-
-static const char *bcast_non_uniform_source =
-    "__kernel void test_bcast_non_uniform(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
-    "        out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);\n"
-    "    } else {\n"
-    "       out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);\n"
-    "    }\n"
-    "}\n";
-
-static const char *bcast_first_source =
-    "__kernel void test_bcast_first(const __global Type *in, __global int4 "
-    "*xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
-    "       out[gid] = sub_group_broadcast_first(x);\n"
-    "    } else {\n"
-    "       out[gid] = sub_group_broadcast_first(x);\n"
-    "    }\n"
-    "}\n";
-
-static const char *ballot_bit_count_source =
-    "__kernel void test_sub_group_ballot_bit_count(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_bit_count(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_inclusive_scan_source =
-    "__kernel void test_sub_group_ballot_inclusive_scan(const __global Type "
-    "*in, __global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_inclusive_scan(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_exclusive_scan_source =
-    "__kernel void test_sub_group_ballot_exclusive_scan(const __global Type "
-    "*in, __global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_exclusive_scan(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_find_lsb_source =
-    "__kernel void test_sub_group_ballot_find_lsb(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_find_lsb(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_find_msb_source =
-    "__kernel void test_sub_group_ballot_find_msb(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);"
-    "    value = (uint4)(sub_group_ballot_find_msb(x),0,0,0);"
-    "    out[gid] = value ;"
-    "}\n";
-
-static const char *get_subgroup_ge_mask_source =
-    "__kernel void test_get_sub_group_ge_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_ge_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_gt_mask_source =
-    "__kernel void test_get_sub_group_gt_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_gt_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_le_mask_source =
-    "__kernel void test_get_sub_group_le_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_le_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_lt_mask_source =
-    "__kernel void test_get_sub_group_lt_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_lt_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_eq_mask_source =
-    "__kernel void test_get_sub_group_eq_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_eq_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *ballot_source =
-    "__kernel void test_sub_group_ballot(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "uint4 full_ballot = sub_group_ballot(1);\n"
-    "uint divergence_mask;\n"
-    "uint4 partial_ballot;\n"
-    "uint gid = get_global_id(0);"
-    "XY(xy,gid);\n"
-    "if (get_sub_group_local_id() & 1) {\n"
-    "    divergence_mask = 0xaaaaaaaa;\n"
-    "    partial_ballot = sub_group_ballot(1);\n"
-    "} else {\n"
-    "    divergence_mask = 0x55555555;\n"
-    "    partial_ballot = sub_group_ballot(1);\n"
-    "}\n"
-    " size_t lws = get_local_size(0);\n"
-    "uint4 masked_ballot = full_ballot;\n"
-    "masked_ballot.x &= divergence_mask;\n"
-    "masked_ballot.y &= divergence_mask;\n"
-    "masked_ballot.z &= divergence_mask;\n"
-    "masked_ballot.w &= divergence_mask;\n"
-    "out[gid] = all(masked_ballot == partial_ballot);\n"
-
-    "} \n";
-
-static const char *ballot_source_inverse =
-    "__kernel void test_sub_group_ballot_inverse(const __global "
-    "Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(10,0,0,0);\n"
-    "    if (get_sub_group_local_id() & 1) {"
-    "        uint4 partial_ballot_mask = "
-    "(uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA);"
-    "        if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
-    "            value = (uint4)(1,0,0,1);\n"
-    "        } else {\n"
-    "            value = (uint4)(0,0,0,1);\n"
-    "        }\n"
-    "    } else {\n"
-    "       uint4 partial_ballot_mask = "
-    "(uint4)(0x55555555,0x55555555,0x55555555,0x55555555);"
-    "        if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
-    "            value = (uint4)(1,0,0,2);\n"
-    "        } else {\n"
-    "            value = (uint4)(0,0,0,2);\n"
-    "        }\n"
-    "    }\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_bit_extract_source =
-    "__kernel void test_sub_group_ballot_bit_extract(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint index = xy[gid].z;\n"
-    "    uint4 value = (uint4)(10,0,0,0);\n"
-    "    if (get_sub_group_local_id() & 1) {"
-    "       if (sub_group_ballot_bit_extract(x, xy[gid].z)) {\n"
-    "           value = (uint4)(1,0,0,1);\n"
-    "       } else {\n"
-    "           value = (uint4)(0,0,0,1);\n"
-    "       }\n"
-    "    } else {\n"
-    "       if (sub_group_ballot_bit_extract(x, xy[gid].w)) {\n"
-    "           value = (uint4)(1,0,0,2);\n"
-    "       } else {\n"
-    "           value = (uint4)(0,0,0,2);\n"
-    "       }\n"
-    "    }\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-template <typename T> int run_non_uniform_broadcast_for_type(RunTestForType rft)
-{
-    int error =
-        rft.run_impl<T, BC<T, SubgroupsBroadcastOp::non_uniform_broadcast>>(
-            "test_bcast_non_uniform", bcast_non_uniform_source);
-    return error;
-}
-
-
-}
-
-int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
-                                   cl_command_queue queue, int num_elements)
-{
-    std::vector<std::string> required_extensions = { "cl_khr_subgroup_ballot" };
-    constexpr size_t global_work_size = 170;
-    constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-
-    // non uniform broadcast functions
-    int error = run_non_uniform_broadcast_for_type<cl_int>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_int3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_uint>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_uint3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_char>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_char3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_uchar>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_uchar3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_short>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_short3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_ushort>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_ushort3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_long>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_long3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_ulong>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_ulong3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_float>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_float3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<cl_double>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_double3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double16>(rft);
-
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half3>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half4>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half8>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half16>(rft);
-
-    // broadcast first functions
-    error |=
-        rft.run_impl<cl_int, BC<cl_int, SubgroupsBroadcastOp::broadcast_first>>(
-            "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_uint,
-                          BC<cl_uint, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_long,
-                          BC<cl_long, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_ulong,
-                          BC<cl_ulong, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_short,
-                          BC<cl_short, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_ushort,
-                          BC<cl_ushort, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_char,
-                          BC<cl_char, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_uchar,
-                          BC<cl_uchar, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_float,
-                          BC<cl_float, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_double,
-                          BC<cl_double, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<
-        subgroups::cl_half,
-        BC<subgroups::cl_half, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-
-    // mask functions
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::eq_mask>>(
-        "test_get_sub_group_eq_mask", get_subgroup_eq_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::ge_mask>>(
-        "test_get_sub_group_ge_mask", get_subgroup_ge_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::gt_mask>>(
-        "test_get_sub_group_gt_mask", get_subgroup_gt_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::le_mask>>(
-        "test_get_sub_group_le_mask", get_subgroup_le_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::lt_mask>>(
-        "test_get_sub_group_lt_mask", get_subgroup_lt_mask_source);
-
-    // ballot functions
-    error |= rft.run_impl<cl_uint, BALLOT<cl_uint>>("test_sub_group_ballot",
-                                                    ballot_source);
-    error |= rft.run_impl<cl_uint4,
-                          BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
-        "test_sub_group_ballot_inverse", ballot_source_inverse);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_BIT_EXTRACT<cl_uint4, BallotOp::ballot_bit_extract>>(
-        "test_sub_group_ballot_bit_extract", ballot_bit_extract_source);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_bit_count>>(
-        "test_sub_group_ballot_bit_count", ballot_bit_count_source);
-    error |= rft.run_impl<
-        cl_uint4,
-        BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_inclusive_scan>>(
-        "test_sub_group_ballot_inclusive_scan", ballot_inclusive_scan_source);
-    error |= rft.run_impl<
-        cl_uint4,
-        BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_exclusive_scan>>(
-        "test_sub_group_ballot_exclusive_scan", ballot_exclusive_scan_source);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_lsb>>(
-        "test_sub_group_ballot_find_lsb", ballot_find_lsb_source);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_msb>>(
-        "test_sub_group_ballot_find_msb", ballot_find_msb_source);
-    return error;
-}

diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
deleted file mode 100644
index 588e9ce..0000000
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ /dev/null

@@ -1,340 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-
-#define CLUSTER_SIZE 4
-#define CLUSTER_SIZE_STR "4"
-
-namespace {
-static const char *redadd_clustered_source =
-    "__kernel void test_redadd_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redmax_clustered_source =
-    "__kernel void test_redmax_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redmin_clustered_source =
-    "__kernel void test_redmin_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redmul_clustered_source =
-    "__kernel void test_redmul_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redand_clustered_source =
-    "__kernel void test_redand_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redor_clustered_source =
-    "__kernel void test_redor_clustered(const __global Type *in, __global int4 "
-    "*xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redxor_clustered_source =
-    "__kernel void test_redxor_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redand_clustered_logical_source =
-    "__kernel void test_redand_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-
-static const char *redor_clustered_logical_source =
-    "__kernel void test_redor_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-
-static const char *redxor_clustered_logical_source =
-    "__kernel void test_redxor_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if ( sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-
-
-// DESCRIPTION:
-// Test for reduce cluster functions
-template <typename Ty, ArithmeticOp operation> struct RED_CLU
-{
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        ng = ng / nw;
-        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ...\n",
-                 operation_names(operation), TypeManager<Ty>::name(),
-                 sizeof(Ty));
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-    }
-
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
-
-        for (int k = 0; k < ng; ++k)
-        {
-            std::vector<cl_int> data_type_sizes;
-            // Map to array indexed to array indexed by local ID and sub group
-            for (int j = 0; j < nw; ++j)
-            {
-                mx[j] = x[j];
-                my[j] = y[j];
-                data_type_sizes.push_back(m[4 * j + 3]);
-            }
-
-            for (cl_int dts : data_type_sizes)
-            {
-                if (dts != sizeof(Ty))
-                {
-                    log_error("ERROR: sub_group_clustered_reduce_%s(%s) "
-                              "wrong data type size detected, expected: %d, "
-                              "used by device %d, in group %d\n",
-                              operation_names(operation),
-                              TypeManager<Ty>::name(), sizeof(Ty), dts, k);
-                    return TEST_FAIL;
-                }
-            }
-
-            for (int j = 0; j < nj; ++j)
-            {
-                int ii = j * ns;
-                int n = ii + ns > nw ? nw - ii : ns;
-                int midx = 4 * ii + 2;
-                std::vector<Ty> clusters_results;
-                int clusters_counter = ns / CLUSTER_SIZE;
-                clusters_results.resize(clusters_counter);
-
-                // Compute target
-                Ty tr = mx[ii];
-                for (int i = 0; i < n; ++i)
-                {
-                    if (i % CLUSTER_SIZE == 0)
-                        tr = mx[ii + i];
-                    else
-                        tr = calculate<Ty>(tr, mx[ii + i], operation);
-                    clusters_results[i / CLUSTER_SIZE] = tr;
-                }
-
-                // Check result
-                for (int i = 0; i < n; ++i)
-                {
-                    Ty rr = my[ii + i];
-                    tr = clusters_results[i / CLUSTER_SIZE];
-                    if (!compare(rr, tr))
-                    {
-                        log_error(
-                            "ERROR: sub_group_clustered_reduce_%s(%s) mismatch "
-                            "for local id %d in sub group %d in group %d\n",
-                            operation_names(operation), TypeManager<Ty>::name(),
-                            i, j, k);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n",
-                 operation_names(operation), TypeManager<Ty>::name(),
-                 sizeof(Ty));
-        return TEST_PASS;
-    }
-};
-
-template <typename T>
-int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
-        "test_redadd_clustered", redadd_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
-        "test_redmax_clustered", redmax_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
-        "test_redmin_clustered", redmin_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
-        "test_redmul_clustered", redmul_clustered_source);
-    return error;
-}
-template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
-        "test_redand_clustered", redand_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
-        "test_redor_clustered", redor_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
-        "test_redxor_clustered", redxor_clustered_source);
-    return error;
-}
-template <typename T>
-int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
-        "test_redand_clustered_logical", redand_clustered_logical_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
-        "test_redor_clustered_logical", redor_clustered_logical_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
-        "test_redxor_clustered_logical", redxor_clustered_logical_source);
-
-    return error;
-}
-}
-
-int test_subgroup_functions_clustered_reduce(cl_device_id device,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements)
-{
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_clustered_reduce"
-    };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-
-    int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_uint>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_long>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_ulong>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_short>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_ushort>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_char>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_uchar>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_float>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_double>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<subgroups::cl_half>(rft);
-
-    error |= run_cluster_and_or_xor_for_type<cl_int>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_uint>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_long>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_ulong>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_short>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_ushort>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_char>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_uchar>(rft);
-
-    error |= run_cluster_logical_and_or_xor_for_type<cl_int>(rft);
-    return error;
-}

diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp
deleted file mode 100644
index 98401b8..0000000
--- a/test_conformance/subgroups/test_subgroup_extended_types.cpp
+++ /dev/null

@@ -1,138 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_kernels.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-
-namespace {
-
-template <typename T> int run_broadcast_for_extended_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
-        "test_bcast", bcast_source);
-    return error;
-}
-
-template <typename T> int run_scan_reduction_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
-                                                               redadd_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
-                                                            redmax_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
-                                                            redmin_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
-                                                             scinadd_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
-                                                             scinmax_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
-                                                             scinmin_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
-                                                             scexadd_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
-                                                             scexmax_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
-                                                             scexmin_source);
-    return error;
-}
-
-
-}
-
-int test_subgroup_functions_extended_types(cl_device_id device,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements)
-{
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_extended_types"
-    };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-
-    int error = run_broadcast_for_extended_type<cl_uint2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_uint3>(rft);
-    error |= run_broadcast_for_extended_type<cl_uint4>(rft);
-    error |= run_broadcast_for_extended_type<cl_uint8>(rft);
-    error |= run_broadcast_for_extended_type<cl_uint16>(rft);
-    error |= run_broadcast_for_extended_type<cl_int2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_int3>(rft);
-    error |= run_broadcast_for_extended_type<cl_int4>(rft);
-    error |= run_broadcast_for_extended_type<cl_int8>(rft);
-    error |= run_broadcast_for_extended_type<cl_int16>(rft);
-
-    error |= run_broadcast_for_extended_type<cl_ulong2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_ulong3>(rft);
-    error |= run_broadcast_for_extended_type<cl_ulong4>(rft);
-    error |= run_broadcast_for_extended_type<cl_ulong8>(rft);
-    error |= run_broadcast_for_extended_type<cl_ulong16>(rft);
-    error |= run_broadcast_for_extended_type<cl_long2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_long3>(rft);
-    error |= run_broadcast_for_extended_type<cl_long4>(rft);
-    error |= run_broadcast_for_extended_type<cl_long8>(rft);
-    error |= run_broadcast_for_extended_type<cl_long16>(rft);
-
-    error |= run_broadcast_for_extended_type<cl_float2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_float3>(rft);
-    error |= run_broadcast_for_extended_type<cl_float4>(rft);
-    error |= run_broadcast_for_extended_type<cl_float8>(rft);
-    error |= run_broadcast_for_extended_type<cl_float16>(rft);
-
-    error |= run_broadcast_for_extended_type<cl_double2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_double3>(rft);
-    error |= run_broadcast_for_extended_type<cl_double4>(rft);
-    error |= run_broadcast_for_extended_type<cl_double8>(rft);
-    error |= run_broadcast_for_extended_type<cl_double16>(rft);
-
-    error |= run_broadcast_for_extended_type<cl_ushort2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_ushort3>(rft);
-    error |= run_broadcast_for_extended_type<cl_ushort4>(rft);
-    error |= run_broadcast_for_extended_type<cl_ushort8>(rft);
-    error |= run_broadcast_for_extended_type<cl_ushort16>(rft);
-    error |= run_broadcast_for_extended_type<cl_short2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_short3>(rft);
-    error |= run_broadcast_for_extended_type<cl_short4>(rft);
-    error |= run_broadcast_for_extended_type<cl_short8>(rft);
-    error |= run_broadcast_for_extended_type<cl_short16>(rft);
-
-    error |= run_broadcast_for_extended_type<cl_uchar2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_uchar3>(rft);
-    error |= run_broadcast_for_extended_type<cl_uchar4>(rft);
-    error |= run_broadcast_for_extended_type<cl_uchar8>(rft);
-    error |= run_broadcast_for_extended_type<cl_uchar16>(rft);
-    error |= run_broadcast_for_extended_type<cl_char2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_char3>(rft);
-    error |= run_broadcast_for_extended_type<cl_char4>(rft);
-    error |= run_broadcast_for_extended_type<cl_char8>(rft);
-    error |= run_broadcast_for_extended_type<cl_char16>(rft);
-
-    error |= run_broadcast_for_extended_type<subgroups::cl_half2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half3>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half4>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half8>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half16>(rft);
-
-    error |= run_scan_reduction_for_type<cl_uchar>(rft);
-    error |= run_scan_reduction_for_type<cl_char>(rft);
-    error |= run_scan_reduction_for_type<cl_ushort>(rft);
-    error |= run_scan_reduction_for_type<cl_short>(rft);
-    return error;
-}

diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
deleted file mode 100644
index eb46ff0..0000000
--- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
+++ /dev/null

@@ -1,473 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "harness/typeWrappers.h"
-#include "subgroup_common_templates.h"
-
-namespace {
-
-static const char *scinadd_non_uniform_source = R"(
-    __kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]);
-            }
-    }
-)";
-
-static const char *scinmax_non_uniform_source = R"(
-    __kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]);
-            }
-    }
-)";
-
-static const char *scinmin_non_uniform_source = R"(
-    __kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]);
-            }
-    }
-)";
-
-static const char *scinmul_non_uniform_source = R"(
-    __kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]);
-            }
-    }
-)";
-
-static const char *scinand_non_uniform_source = R"(
-    __kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scinor_non_uniform_source = R"(
-    __kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scinxor_non_uniform_source = R"(
-    __kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *scinand_non_uniform_logical_source = R"(
-    __kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scinor_non_uniform_logical_source = R"(
-    __kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scinxor_non_uniform_logical_source = R"(
-    __kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *scexadd_non_uniform_source = R"(
-    __kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]);
-            }
-    }
-)";
-
-static const char *scexmax_non_uniform_source = R"(
-    __kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]);
-            }
-    }
-)";
-
-static const char *scexmin_non_uniform_source = R"(
-    __kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]);
-            }
-    }
-)";
-
-static const char *scexmul_non_uniform_source = R"(
-    __kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]);
-            }
-    }
-)";
-
-static const char *scexand_non_uniform_source = R"(
-    __kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scexor_non_uniform_source = R"(
-    __kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scexxor_non_uniform_source = R"(
-    __kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *scexand_non_uniform_logical_source = R"(
-    __kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scexor_non_uniform_logical_source = R"(
-    __kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scexxor_non_uniform_logical_source = R"(
-    __kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *redadd_non_uniform_source = R"(
-    __kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_add(in[gid]);
-            }
-    }
-)";
-
-static const char *redmax_non_uniform_source = R"(
-    __kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_max(in[gid]);
-            }
-    }
-)";
-
-static const char *redmin_non_uniform_source = R"(
-    __kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_min(in[gid]);
-            }
-    }
-)";
-
-static const char *redmul_non_uniform_source = R"(
-    __kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_mul(in[gid]);
-            }
-    }
-)";
-
-static const char *redand_non_uniform_source = R"(
-    __kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_and(in[gid]);
-            }
-    }
-)";
-
-static const char *redor_non_uniform_source = R"(
-    __kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_or(in[gid]);
-            }
-    }
-)";
-
-static const char *redxor_non_uniform_source = R"(
-    __kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *redand_non_uniform_logical_source = R"(
-    __kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]);
-            }
-    }
-)";
-
-static const char *redor_non_uniform_logical_source = R"(
-    __kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]);
-            }
-    }
-)";
-
-static const char *redxor_non_uniform_logical_source = R"(
-    __kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]);
-            }
-    }
-)";
-
-template <typename T>
-int run_functions_add_mul_max_min_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
-        "test_scinadd_non_uniform", scinadd_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::mul_>>(
-        "test_scinmul_non_uniform", scinmul_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
-        "test_scinmax_non_uniform", scinmax_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
-        "test_scinmin_non_uniform", scinmin_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
-        "test_scexadd_non_uniform", scexadd_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::mul_>>(
-        "test_scexmul_non_uniform", scexmul_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
-        "test_scexmax_non_uniform", scexmax_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
-        "test_scexmin_non_uniform", scexmin_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>(
-        "test_redadd_non_uniform", redadd_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::mul_>>(
-        "test_redmul_non_uniform", redmul_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>(
-        "test_redmax_non_uniform", redmax_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>(
-        "test_redmin_non_uniform", redmin_non_uniform_source);
-    return error;
-}
-
-template <typename T> int run_functions_and_or_xor_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::and_>>(
-        "test_scinand_non_uniform", scinand_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::or_>>(
-        "test_scinor_non_uniform", scinor_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::xor_>>(
-        "test_scinxor_non_uniform", scinxor_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::and_>>(
-        "test_scexand_non_uniform", scexand_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::or_>>(
-        "test_scexor_non_uniform", scexor_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::xor_>>(
-        "test_scexxor_non_uniform", scexxor_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::and_>>(
-        "test_redand_non_uniform", redand_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::or_>>(
-        "test_redor_non_uniform", redor_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::xor_>>(
-        "test_redxor_non_uniform", redxor_non_uniform_source);
-    return error;
-}
-
-template <typename T>
-int run_functions_logical_and_or_xor_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_and>>(
-        "test_scinand_non_uniform_logical", scinand_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_or>>(
-        "test_scinor_non_uniform_logical", scinor_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_xor>>(
-        "test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_and>>(
-        "test_scexand_non_uniform_logical", scexand_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_or>>(
-        "test_scexor_non_uniform_logical", scexor_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_xor>>(
-        "test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_and>>(
-        "test_redand_non_uniform_logical", redand_non_uniform_logical_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_or>>(
-        "test_redor_non_uniform_logical", redor_non_uniform_logical_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_xor>>(
-        "test_redxor_non_uniform_logical", redxor_non_uniform_logical_source);
-    return error;
-}
-
-}
-
-int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements)
-{
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_non_uniform_arithmetic"
-    };
-    std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
-                                 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
-                                 0x00ffff00, 0x80000000, 0xaaaaaaaa };
-
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions, masks);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-
-    int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_uint>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_long>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_ulong>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_short>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_ushort>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_char>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_uchar>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_float>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_double>(rft);
-    error |= run_functions_add_mul_max_min_for_type<subgroups::cl_half>(rft);
-
-    error |= run_functions_and_or_xor_for_type<cl_int>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_uint>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_long>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_ulong>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_short>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_ushort>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_char>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_uchar>(rft);
-
-    error |= run_functions_logical_and_or_xor_for_type<cl_int>(rft);
-    return error;
-}
\ No newline at end of file

diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
deleted file mode 100644
index 2b00b4d..0000000
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ /dev/null

@@ -1,303 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "harness/typeWrappers.h"
-#include <set>
-
-namespace {
-
-template <typename T, NonUniformVoteOp operation> struct VOTE
-{
-    static void gen(T *x, T *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        int last_subgroup_size = 0;
-        ii = 0;
-
-        log_info("  sub_group_%s%s... \n",
-                 (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation));
-
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x data type (%s)\n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask,
-                 TypeManager<T>::name());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-        if (operation == NonUniformVoteOp::elect) return;
-
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-                int e = genrand_int32(gMTdata) % 3;
-
-                for (i = 0; i < n; i++)
-                {
-                    if (e == 2)
-                    { // set once 0 and once 1 alternately
-                        int value = i % 2;
-                        set_value(t[ii + i], value);
-                    }
-                    else
-                    { // set 0/1 for all work items in subgroup
-                        set_value(t[ii + i], e);
-                    }
-                }
-            }
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            {
-                x[j] = t[j];
-            }
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-
-    static int chk(T *x, T *y, T *mx, T *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        cl_int tr, rr;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        if (non_uniform_size) ng++;
-        int last_subgroup_size = 0;
-
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-
-                rr = 0;
-                if (operation == NonUniformVoteOp::all
-                    || operation == NonUniformVoteOp::all_equal)
-                    tr = 1;
-                if (operation == NonUniformVoteOp::any) tr = 0;
-
-                std::set<int> active_work_items;
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (work_items_mask & check_work_item)
-                    {
-                        active_work_items.insert(i);
-                        switch (operation)
-                        {
-                            case NonUniformVoteOp::elect: break;
-
-                            case NonUniformVoteOp::all:
-                                tr &=
-                                    !compare_ordered<T>(mx[ii + i], 0) ? 1 : 0;
-                                break;
-                            case NonUniformVoteOp::any:
-                                tr |=
-                                    !compare_ordered<T>(mx[ii + i], 0) ? 1 : 0;
-                                break;
-                            case NonUniformVoteOp::all_equal:
-                                tr &= compare_ordered<T>(
-                                          mx[ii + i],
-                                          mx[ii + *active_work_items.begin()])
-                                    ? 1
-                                    : 0;
-                                break;
-                            default:
-                                log_error("Unknown operation\n");
-                                return TEST_FAIL;
-                        }
-                    }
-                }
-                if (active_work_items.empty())
-                {
-                    log_info("  no one workitem acitve... in workgroup id = %d "
-                             "subgroup id = %d\n",
-                             k, j);
-                }
-                else
-                {
-                    auto lowest_active = active_work_items.begin();
-                    for (const int &active_work_item : active_work_items)
-                    {
-                        i = active_work_item;
-                        if (operation == NonUniformVoteOp::elect)
-                        {
-                            i == *lowest_active ? tr = 1 : tr = 0;
-                        }
-
-                        // normalize device values on host, non zero set 1.
-                        rr = compare_ordered<T>(my[ii + i], 0) ? 0 : 1;
-
-                        if (rr != tr)
-                        {
-                            log_error("ERROR: sub_group_%s() \n",
-                                      operation_names(operation));
-                            log_error(
-                                "mismatch for work item %d sub group %d in "
-                                "work group %d. Expected: %d Obtained: %d\n",
-                                i, j, k, tr, rr);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-
-        log_info("  sub_group_%s%s... passed\n",
-                 (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation));
-        return TEST_PASS;
-    }
-};
-static const char *elect_source = R"(
-    __kernel void test_elect(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_elect();
-            }
-    }
-)";
-
-static const char *non_uniform_any_source = R"(
-    __kernel void test_non_uniform_any(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_any(in[gid]);
-            }
-    }
-)";
-
-static const char *non_uniform_all_source = R"(
-    __kernel void test_non_uniform_all(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_all(in[gid]);
-            }
-    }
-)";
-
-static const char *non_uniform_all_equal_source = R"(
-    __kernel void test_non_uniform_all_equal(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_all_equal(in[gid]);
-            }
-    }
-)";
-
-template <typename T> int run_vote_all_equal_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, VOTE<T, NonUniformVoteOp::all_equal>>(
-        "test_non_uniform_all_equal", non_uniform_all_equal_source);
-    return error;
-}
-}
-
-int test_subgroup_functions_non_uniform_vote(cl_device_id device,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements)
-{
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_non_uniform_vote"
-    };
-
-    std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
-                                 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
-                                 0x00ffff00, 0x80000000 };
-    constexpr size_t global_work_size = 170;
-    constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions, masks);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-
-    int error = run_vote_all_equal_for_type<cl_int>(rft);
-    error |= run_vote_all_equal_for_type<cl_uint>(rft);
-    error |= run_vote_all_equal_for_type<cl_long>(rft);
-    error |= run_vote_all_equal_for_type<cl_ulong>(rft);
-    error |= run_vote_all_equal_for_type<cl_float>(rft);
-    error |= run_vote_all_equal_for_type<cl_double>(rft);
-    error |= run_vote_all_equal_for_type<subgroups::cl_half>(rft);
-
-    error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::all>>(
-        "test_non_uniform_all", non_uniform_all_source);
-    error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::elect>>(
-        "test_elect", elect_source);
-    error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::any>>(
-        "test_non_uniform_any", non_uniform_any_source);
-    return error;
-}

diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp
deleted file mode 100644
index 049f098..0000000
--- a/test_conformance/subgroups/test_subgroup_shuffle.cpp
+++ /dev/null

@@ -1,78 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-#include <bitset>
-
-namespace {
-
-static const char* shuffle_xor_source =
-    "__kernel void test_sub_group_shuffle_xor(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_xor(x, xy[gid].z);"
-    "}\n";
-
-static const char* shuffle_source =
-    "__kernel void test_sub_group_shuffle(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle(x, xy[gid].z);"
-    "}\n";
-
-template <typename T> int run_shuffle_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle>>(
-        "test_sub_group_shuffle", shuffle_source);
-    error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_xor>>(
-        "test_sub_group_shuffle_xor", shuffle_xor_source);
-    return error;
-}
-
-}
-
-int test_subgroup_functions_shuffle(cl_device_id device, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-{
-    std::vector<std::string> required_extensions{ "cl_khr_subgroup_shuffle" };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-
-    int error = run_shuffle_for_type<cl_int>(rft);
-    error |= run_shuffle_for_type<cl_uint>(rft);
-    error |= run_shuffle_for_type<cl_long>(rft);
-    error |= run_shuffle_for_type<cl_ulong>(rft);
-    error |= run_shuffle_for_type<cl_short>(rft);
-    error |= run_shuffle_for_type<cl_ushort>(rft);
-    error |= run_shuffle_for_type<cl_char>(rft);
-    error |= run_shuffle_for_type<cl_uchar>(rft);
-    error |= run_shuffle_for_type<cl_float>(rft);
-    error |= run_shuffle_for_type<cl_double>(rft);
-    error |= run_shuffle_for_type<subgroups::cl_half>(rft);
-
-    return error;
-}

diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
deleted file mode 100644
index 6000c97..0000000
--- a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
+++ /dev/null

@@ -1,81 +0,0 @@
-//
-// Copyright (c) 2021 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-namespace {
-
-static const char* shuffle_down_source =
-    "__kernel void test_sub_group_shuffle_down(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_down(x, xy[gid].z);"
-    "}\n";
-static const char* shuffle_up_source =
-    "__kernel void test_sub_group_shuffle_up(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_up(x, xy[gid].z);"
-    "}\n";
-
-template <typename T> int run_shuffle_relative_for_type(RunTestForType rft)
-{
-    int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>(
-        "test_sub_group_shuffle_up", shuffle_up_source);
-    error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_down>>(
-        "test_sub_group_shuffle_down", shuffle_down_source);
-    return error;
-}
-
-}
-
-int test_subgroup_functions_shuffle_relative(cl_device_id device,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements)
-{
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_shuffle_relative"
-    };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-
-    int error = run_shuffle_relative_for_type<cl_int>(rft);
-    error |= run_shuffle_relative_for_type<cl_uint>(rft);
-    error |= run_shuffle_relative_for_type<cl_long>(rft);
-    error |= run_shuffle_relative_for_type<cl_ulong>(rft);
-    error |= run_shuffle_relative_for_type<cl_short>(rft);
-    error |= run_shuffle_relative_for_type<cl_ushort>(rft);
-    error |= run_shuffle_relative_for_type<cl_char>(rft);
-    error |= run_shuffle_relative_for_type<cl_uchar>(rft);
-    error |= run_shuffle_relative_for_type<cl_float>(rft);
-    error |= run_shuffle_relative_for_type<cl_double>(rft);
-    error |= run_shuffle_relative_for_type<subgroups::cl_half>(rft);
-
-    return error;
-}

diff --git a/test_conformance/subgroups/test_workgroup.cpp b/test_conformance/subgroups/test_workgroup.cpp
new file mode 100644
index 0000000..bc9d619
--- /dev/null
+++ b/test_conformance/subgroups/test_workgroup.cpp

@@ -0,0 +1,875 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "subhelpers.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+
+static const char *any_source = "__kernel void test_any(const __global Type "
+                                "*in, __global int2 *xy, __global Type *out)\n"
+                                "{\n"
+                                "    int gid = get_global_id(0);\n"
+                                "    XY(xy,gid);\n"
+                                "    out[gid] = sub_group_any(in[gid]);\n"
+                                "}\n";
+
+static const char *all_source = "__kernel void test_all(const __global Type "
+                                "*in, __global int2 *xy, __global Type *out)\n"
+                                "{\n"
+                                "    int gid = get_global_id(0);\n"
+                                "    XY(xy,gid);\n"
+                                "    out[gid] = sub_group_all(in[gid]);\n"
+                                "}\n";
+
+static const char *bcast_source =
+    "__kernel void test_bcast(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    Type x = in[gid];\n"
+    "    size_t loid = (size_t)((int)x % 100);\n"
+    "    out[gid] = sub_group_broadcast(x, loid);\n"
+    "}\n";
+
+static const char *redadd_source =
+    "__kernel void test_redadd(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_reduce_add(in[gid]);\n"
+    "}\n";
+
+static const char *redmax_source =
+    "__kernel void test_redmax(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_reduce_max(in[gid]);\n"
+    "}\n";
+
+static const char *redmin_source =
+    "__kernel void test_redmin(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_reduce_min(in[gid]);\n"
+    "}\n";
+
+static const char *scinadd_source =
+    "__kernel void test_scinadd(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
+    "}\n";
+
+static const char *scinmax_source =
+    "__kernel void test_scinmax(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
+    "}\n";
+
+static const char *scinmin_source =
+    "__kernel void test_scinmin(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
+    "}\n";
+
+static const char *scexadd_source =
+    "__kernel void test_scexadd(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
+    "}\n";
+
+static const char *scexmax_source =
+    "__kernel void test_scexmax(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
+    "}\n";
+
+static const char *scexmin_source =
+    "__kernel void test_scexmin(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
+    "}\n";
+
+
+// Any/All test functions
+template <int Which> struct AA
+{
+    static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        int e;
+
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                e = (int)(genrand_int32(gMTdata) % 3);
+
+                // Initialize data matrix indexed by local id and sub group id
+                switch (e)
+                {
+                    case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break;
+                    case 1:
+                        memset(&t[ii], 0, n * sizeof(cl_int));
+                        i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
+                        t[ii + i] = 41;
+                        break;
+                    case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break;
+                }
+            }
+
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+
+    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
+                   int ns, int nw, int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        cl_int taa, raa;
+
+        log_info("  sub_group_%s...\n", Which == 0 ? "any" : "all");
+
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Compute target
+                if (Which == 0)
+                {
+                    taa = 0;
+                    for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0;
+                }
+                else
+                {
+                    taa = 1;
+                    for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0;
+                }
+
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    raa = my[ii + i] != 0;
+                    if (raa != taa)
+                    {
+                        log_error("ERROR: sub_group_%s mismatch for local id "
+                                  "%d in sub group %d in group %d\n",
+                                  Which == 0 ? "any" : "all", i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+
+        return 0;
+    }
+};
+
+// Reduce functions
+template <typename Ty, int Which> struct RED
+{
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i = 0; i < n; ++i)
+                    t[ii + i] = (Ty)(
+                        (int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
+            }
+
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, rr;
+
+        log_info("  sub_group_reduce_%s(%s)...\n",
+                 Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                 TypeName<Ty>::val());
+
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Compute target
+                if (Which == 0)
+                {
+                    // add
+                    tr = mx[ii];
+                    for (i = 1; i < n; ++i) tr += mx[ii + i];
+                }
+                else if (Which == 1)
+                {
+                    // max
+                    tr = mx[ii];
+                    for (i = 1; i < n; ++i)
+                        tr = tr > mx[ii + i] ? tr : mx[ii + i];
+                }
+                else if (Which == 2)
+                {
+                    // min
+                    tr = mx[ii];
+                    for (i = 1; i < n; ++i)
+                        tr = tr > mx[ii + i] ? mx[ii + i] : tr;
+                }
+
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    rr = my[ii + i];
+                    if (rr != tr)
+                    {
+                        log_error("ERROR: sub_group_reduce_%s(%s) mismatch for "
+                                  "local id %d in sub group %d in group %d\n",
+                                  Which == 0 ? "add"
+                                             : (Which == 1 ? "max" : "min"),
+                                  TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+
+        return 0;
+    }
+};
+
+// Scan Inclusive functions
+template <typename Ty, int Which> struct SCIN
+{
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i = 0; i < n; ++i)
+                    // t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff)
+                    // % ns + 1);
+                    t[ii + i] = (Ty)i;
+            }
+
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, rr;
+
+        log_info("  sub_group_scan_inclusive_%s(%s)...\n",
+                 Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                 TypeName<Ty>::val());
+
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    if (Which == 0)
+                    {
+                        tr = i == 0 ? mx[ii] : tr + mx[ii + i];
+                    }
+                    else if (Which == 1)
+                    {
+                        tr = i == 0 ? mx[ii]
+                                    : (tr > mx[ii + i] ? tr : mx[ii + i]);
+                    }
+                    else
+                    {
+                        tr = i == 0 ? mx[ii]
+                                    : (tr > mx[ii + i] ? mx[ii + i] : tr);
+                    }
+
+                    rr = my[ii + i];
+                    if (rr != tr)
+                    {
+                        log_error(
+                            "ERROR: sub_group_scan_inclusive_%s(%s) mismatch "
+                            "for local id %d in sub group %d in group %d\n",
+                            Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                            TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+
+        return 0;
+    }
+};
+
+// Scan Exclusive functions
+template <typename Ty, int Which> struct SCEX
+{
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i = 0; i < n; ++i)
+                    t[ii + i] = (Ty)(
+                        (int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
+            }
+
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, trt, rr;
+
+        log_info("  sub_group_scan_exclusive_%s(%s)...\n",
+                 Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                 TypeName<Ty>::val());
+
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    if (Which == 0)
+                    {
+                        tr = i == 0 ? TypeIdentity<Ty, Which>::val() : tr + trt;
+                    }
+                    else if (Which == 1)
+                    {
+                        tr = i == 0 ? TypeIdentity<Ty, Which>::val()
+                                    : (trt > tr ? trt : tr);
+                    }
+                    else
+                    {
+                        tr = i == 0 ? TypeIdentity<Ty, Which>::val()
+                                    : (trt > tr ? tr : trt);
+                    }
+                    trt = mx[ii + i];
+                    rr = my[ii + i];
+
+                    if (rr != tr)
+                    {
+                        log_error(
+                            "ERROR: sub_group_scan_exclusive_%s(%s) mismatch "
+                            "for local id %d in sub group %d in group %d\n",
+                            Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                            TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+
+        return 0;
+    }
+};
+
+// Broadcast functios
+template <typename Ty> struct BC
+{
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, l, n;
+        int nj = (nw + ns - 1) / ns;
+        int d = ns > 100 ? 100 : ns;
+
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
+                    % (d > n ? n : d);
+
+                for (i = 0; i < n; ++i)
+                    t[ii + i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff)
+                                         % 100 * 100
+                                     + l);
+            }
+
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, l, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, rr;
+
+        log_info("  sub_group_broadcast(%s)...\n", TypeName<Ty>::val());
+
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                l = (int)mx[ii] % 100;
+                tr = mx[ii + l];
+
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    rr = my[ii + i];
+                    if (rr != tr)
+                    {
+                        log_error("ERROR: sub_group_broadcast(%s) mismatch for "
+                                  "local id %d in sub group %d in group %d\n",
+                                  TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+
+        return 0;
+    }
+};
+
+
+// Entry point from main
+int test_work_group_functions(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements,
+                              bool useCoreSubgroups)
+{
+    int error;
+
+    // Adjust these individually below if desired/needed
+#define G 2000
+#define L 200
+
+    error = test<int, AA<0>, G, L>::run(device, context, queue, num_elements,
+                                        "test_any", any_source, 0,
+                                        useCoreSubgroups);
+    error |= test<int, AA<1>, G, L>::run(device, context, queue, num_elements,
+                                         "test_all", all_source, 0,
+                                         useCoreSubgroups);
+
+    // error |= test<cl_half, BC<cl_half>, G, L>::run(device, context, queue,
+    // num_elements, "test_bcast", bcast_source);
+    error |= test<cl_uint, BC<cl_uint>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, BC<cl_int>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, BC<cl_ulong>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, BC<cl_long>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<float, BC<float>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<double, BC<double>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, RED<cl_half,0>, G, L>::run(device, context, queue,
+    // num_elements, "test_redadd", redadd_source);
+    error |= test<cl_uint, RED<cl_uint, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, RED<cl_int, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, RED<cl_ulong, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, RED<cl_long, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<float, RED<float, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<double, RED<double, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, RED<cl_half,1>, G, L>::run(device, context, queue,
+    // num_elements, "test_redmax", redmax_source);
+    error |= test<cl_uint, RED<cl_uint, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, RED<cl_int, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, RED<cl_ulong, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, RED<cl_long, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<float, RED<float, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<double, RED<double, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, RED<cl_half,2>, G, L>::run(device, context, queue,
+    // num_elements, "test_redmin", redmin_source);
+    error |= test<cl_uint, RED<cl_uint, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, RED<cl_int, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, RED<cl_ulong, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, RED<cl_long, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<float, RED<float, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<double, RED<double, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, SCIN<cl_half,0>, G, L>::run(device, context,
+    // queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<cl_uint, SCIN<cl_uint, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCIN<cl_int, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCIN<cl_ulong, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCIN<cl_long, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCIN<float, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCIN<double, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, SCIN<cl_half,1>, G, L>::run(device, context,
+    // queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<cl_uint, SCIN<cl_uint, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCIN<cl_int, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCIN<cl_ulong, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCIN<cl_long, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCIN<float, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCIN<double, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, SCIN<cl_half,2>, G, L>::run(device, context,
+    // queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<cl_uint, SCIN<cl_uint, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCIN<cl_int, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCIN<cl_ulong, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCIN<cl_long, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCIN<float, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCIN<double, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, SCEX<cl_half,0>, G, L>::run(device, context,
+    // queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<cl_uint, SCEX<cl_uint, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCEX<cl_int, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCEX<cl_ulong, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCEX<cl_long, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCEX<float, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCEX<double, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, SCEX<cl_half,1>, G, L>::run(device, context,
+    // queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<cl_uint, SCEX<cl_uint, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCEX<cl_int, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCEX<cl_ulong, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCEX<cl_long, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCEX<float, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCEX<double, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+
+    // error |= test<cl_half, SCEX<cl_half,2>, G, L>::run(device, context,
+    // queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<cl_uint, SCEX<cl_uint, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCEX<cl_int, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCEX<cl_ulong, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCEX<cl_long, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCEX<float, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCEX<double, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    return error;
+}
+
+int test_work_group_functions_core(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    return test_work_group_functions(device, context, queue, num_elements,
+                                     true);
+}
+
+int test_work_group_functions_ext(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
+
+    if (!hasExtension)
+    {
+        log_info(
+            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+    return test_work_group_functions(device, context, queue, num_elements,
+                                     false);
+}

diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp
index 7ffa6a7..b77bfe1 100644
--- a/test_conformance/subgroups/test_workitem.cpp
+++ b/test_conformance/subgroups/test_workitem.cpp

@@ -227,8 +227,9 @@
            "}";
     const std::string &kernel_str = kernel_sstr.str();
     const char *kernel_src = kernel_str.c_str();
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "get_test");
+    error = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel, 1, &kernel_src, "get_test",
+        "-cl-std=CL2.0");
     if (error != 0) return error;
 
     error = get_max_allowed_work_group_size(context, kernel, &local, NULL);

diff --git a/test_conformance/submission_details_template.txt b/test_conformance/submission_details_template.txt
index 9d276a6..20554c8 100644
--- a/test_conformance/submission_details_template.txt
+++ b/test_conformance/submission_details_template.txt

@@ -1,5 +1,5 @@
 ##############################################################################
-# OpenCL submission details template
+# OpenCL 2.0 submission details template
 # $Id $
 # $URL $
 ##############################################################################
@@ -13,7 +13,8 @@
 
 ##############################################################################
 #
-# Submission information that needs to be copied onto the web submission form
+# Submission information (mostly section 7 and F.5 of Conformance Process 
+# Document) that needs to be copied onto the web submission form.
 #
 ##############################################################################
 
@@ -26,7 +27,7 @@
 
 # Version of OpenCL specification being tested
 #
-OpenCL Version: Major.Minor
+OpenCL Version: 2.0
 
 # Statement of conformance listing each conformant product (at a specific
 # version) that is covered by this implementation. List each conformant product
@@ -73,18 +74,19 @@
 
 ##############################################################################
 #
-# Further submission information
+# Further submission information (mostly section 7 of Conformance Process
+# Document).
 #
 ##############################################################################
 
-# git tag of the tests used from GitHub (e.g. vYYYY-MM-DD-XX)
+# Date of tests used, 8 digit string as given in filename.
 # 
-Tests version:
+Tests date:
 
-# Implementations that support cl_khr_icd are required to use a loader to run
-# the tests and document the loader that was used.
-#
-Loader used:
+
+# SHA-1 git identifier of the tests used from Gitlab (ex: 0a7770f98664a092c70d0a7d9a48d229b5fd8039)
+# 
+Test ID:
 
 # Date of "Khronos Conformance Process" that this submission
 # adheres to (as shown in the change history at the start of the document).
@@ -97,9 +99,47 @@
 #
 OpenCL Conformance Process Attachment date:
 
-# The conformance process document makes allowances for skipping specific tests
-# in some situations. A list of tests that were skipped in accordance to these
-# rules along with the justification must be documented here.
+# List of Khronos Bugzilla bugs filed for test bugs that have been fixed
+# by modifying the tests used in this submission. Separate bug numbers with
+# commas; if none, specify "none". It is OK to use an existing bug describing
+# the same problem. In any case, after filing this conformance submission,
+# add a comment to the bug referencing the submission tracking number
+# giving justification for the test change.
 #
-List of skipped tests in accordance with conformance process:
+Test Bugs:
+
+##############################################################################
+#
+# Tested device configuration
+#
+##############################################################################
+
+# Max compute units (CL_DEVICE_MAX_COMPUTE_UNITS)
+CL_DEVICE_MAX_COMPUTE_UNITS:
+
+# Max clock frequency (CL_DEVICE_MAX_CLOCK_FREQUENCY)
+CL_DEVICE_MAX_CLOCK_FREQUENCY:
+
+# Max memory allocation size (CL_DEVICE_MAX_MEM_ALLOC_SIZE)
+CL_DEVICE_MAX_MEM_ALLOC_SIZE:
+
+# Image support (CL_DEVICE_IMAGE_SUPPORT), 1/0
+CL_DEVICE_IMAGE_SUPPORT:
+
+# Global memory size (CL_DEVICE_GLOBAL_MEM_SIZE)
+CL_DEVICE_GLOBAL_MEM_SIZE:
+
+# Max constant buffer size (CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE)
+CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
+
+# Local memory size (CL_DEVICE_LOCAL_MEM_SIZE)
+CL_DEVICE_LOCAL_MEM_SIZE:
+
+
+##############################################################################
+#
+# Extension Queries
+#
+##############################################################################
+# Show the list of supported extensions per device tested, each extension string on a separate line.
 

diff --git a/test_conformance/thread_dimensions/main.cpp b/test_conformance/thread_dimensions/main.cpp
index 9a1ce60..bddbc0f 100644
--- a/test_conformance/thread_dimensions/main.cpp
+++ b/test_conformance/thread_dimensions/main.cpp

@@ -38,6 +38,6 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
 }
 

diff --git a/test_conformance/thread_dimensions/test_thread_dimensions.cpp b/test_conformance/thread_dimensions/test_thread_dimensions.cpp
index c8d22c6..84f5708 100644
--- a/test_conformance/thread_dimensions/test_thread_dimensions.cpp
+++ b/test_conformance/thread_dimensions/test_thread_dimensions.cpp

@@ -501,8 +501,7 @@
     log_info("Memory allocation size to use is %gMB, max workgroup size is %d.\n", max_memory_size/(1024.0*1024.0), (int)max_workgroup_size);
 
     while (!found_size && memory_size >= max_memory_size/8) {
-        array =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, memory_size, NULL, &err);
+        array = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), memory_size, NULL, &err);
         if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE || err == CL_OUT_OF_HOST_MEMORY) {
             memory_size -= max_memory_size/16;
             continue;

diff --git a/test_conformance/vec_align/CMakeLists.txt b/test_conformance/vec_align/CMakeLists.txt
new file mode 100644
index 0000000..f1a42e6
--- /dev/null
+++ b/test_conformance/vec_align/CMakeLists.txt

@@ -0,0 +1,11 @@
+set(MODULE_NAME VECALIGN)
+
+set(${MODULE_NAME}_SOURCES
+    globals.cpp
+        main.cpp
+        structs.cpp
+        test_vec_align.cpp
+        type_replacer.cpp
+)
+
+include(../CMakeCommon.txt)

diff --git a/test_conformance/vec_align/defines.h b/test_conformance/vec_align/defines.h
new file mode 100644
index 0000000..f2bf9e7
--- /dev/null
+++ b/test_conformance/vec_align/defines.h

@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+#define NUM_VECTOR_SIZES 6
+
+extern int g_arrVecSizes[NUM_VECTOR_SIZES];
+extern int g_arrVecSteps[NUM_VECTOR_SIZES];
+extern bool g_wimpyMode;
+
+extern const char * g_arrVecSizeNames[NUM_VECTOR_SIZES];
+extern size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES];
+
+// Define the buffer size that we want to block our test with
+#define BUFFER_SIZE (1024*1024)
+#define KPAGESIZE 4096
+
+extern ExplicitType types[];
+
+extern const char *g_arrTypeNames[];
+extern const size_t g_arrTypeSizes[];

diff --git a/test_conformance/vec_align/globals.cpp b/test_conformance/vec_align/globals.cpp
new file mode 100644
index 0000000..3deecad
--- /dev/null
+++ b/test_conformance/vec_align/globals.cpp

@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "defines.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+int g_arrVecSizes[NUM_VECTOR_SIZES] = {1,2,3,4,8,16};
+int g_arrVecSteps[NUM_VECTOR_SIZES] = {1,2,4,4,8,16};
+const char * g_arrVecSizeNames[NUM_VECTOR_SIZES] = {"", "2","3","4","8","16"};
+size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES] = {(size_t)0,
+                           (size_t)0x1, // 2
+                           (size_t)0x3, // 3
+                           (size_t)0x3, // 4
+                           (size_t)0x7, // 8
+                           (size_t)0xf // 16
+};
+
+bool g_wimpyMode = false;
+
+ExplicitType types[] = { kChar, kUChar,
+             kShort, kUShort,
+             kInt, kUInt,
+             kLong, kULong,
+             kFloat, kDouble,
+             kNumExplicitTypes };
+
+
+const char *g_arrTypeNames[] =
+    {
+    "char",  "uchar",
+    "short", "ushort",
+    "int",   "uint",
+    "long",  "ulong",
+    "float", "double"
+    };
+
+extern const size_t g_arrTypeSizes[] =
+    {
+    1, 1,
+    2, 2,
+    4, 4,
+    8, 8,
+    4, 8
+    };
+

diff --git a/test_conformance/vec_align/main.cpp b/test_conformance/vec_align/main.cpp
new file mode 100644
index 0000000..6894895
--- /dev/null
+++ b/test_conformance/vec_align/main.cpp

@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+test_definition test_list[] = {
+    ADD_TEST( vec_align_array ),
+    ADD_TEST( vec_align_struct ),
+    ADD_TEST( vec_align_packed_struct ),
+    ADD_TEST( vec_align_struct_arr ),
+    ADD_TEST( vec_align_packed_struct_arr ),
+};
+
+const int test_num = ARRAY_SIZE( test_list );
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
+}
+

diff --git a/test_conformance/vec_align/procs.h b/test_conformance/vec_align/procs.h
new file mode 100644
index 0000000..7b60e1e
--- /dev/null
+++ b/test_conformance/vec_align/procs.h

@@ -0,0 +1,39 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+
+// The number of errors to print out for each test in the shuffle tests
+#define MAX_ERRORS_TO_PRINT 1
+
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+
+int test_vec_align_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+int test_vec_align_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);

diff --git a/test_conformance/vec_align/structs.cpp b/test_conformance/vec_align/structs.cpp
new file mode 100644
index 0000000..2e15e36
--- /dev/null
+++ b/test_conformance/vec_align/structs.cpp

@@ -0,0 +1,375 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "structs.h"
+
+
+#include "defines.h"
+
+#define DEBUG_MEM_ALLOC 0
+
+/** typedef struct _bufferStruct
+ {
+ void * m_pIn;
+ void * m_pOut;
+
+ cl_mem m_outBuffer;
+ cl_mem m_inBuffer;
+
+ size_t m_bufSize;
+ } bufferStruct;
+ */
+
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue)
+{
+    clState * pResult = (clState *)malloc(sizeof(clState));
+#if DEBUG_MEM_ALLOC
+    log_info("malloc clState * %x\n", pResult);
+#endif
+
+    pResult->m_device = device;
+    pResult->m_context = context;
+    pResult->m_queue = queue;
+
+    pResult->m_kernel = NULL; pResult->m_program = NULL;
+    return pResult;
+}
+
+clState * destroyClState(clState * pState)
+{
+    clStateDestroyProgramAndKernel(pState);
+#if DEBUG_MEM_ALLOC
+    log_info("delete (free) clState * %x\n", pState);
+#endif
+    free(pState);
+    return NULL;
+}
+
+
+int clStateMakeProgram(clState * pState, const char * prog,
+                       const char * kernelName)
+{
+    const char * srcArr[1] = {NULL};
+    srcArr[0] = prog;
+    int err = create_single_kernel_helper(pState->m_context,
+                                          &(pState->m_program),
+                                          &(pState->m_kernel),
+                                          1, srcArr, kernelName );
+#if DEBUG_MEM_ALLOC
+    log_info("create program and kernel\n");
+#endif
+    return err;
+}
+
+int runKernel(clState * pState, size_t numThreads) {
+    int err;
+    pState->m_numThreads = numThreads;
+    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel,
+                                 1, NULL, &(pState->m_numThreads),
+                                 NULL, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel returned %d (%x)\n",
+                  err, err);
+        return -1;
+    }
+    return 0;
+}
+
+
+void clStateDestroyProgramAndKernel(clState * pState)
+{
+#if DEBUG_MEM_ALLOC
+    log_info("destroy program and kernel\n");
+#endif
+    if(pState->m_kernel != NULL) {
+        clReleaseKernel( pState->m_kernel );
+        pState->m_kernel = NULL;
+    }
+    if(pState->m_program != NULL) {
+        clReleaseProgram( pState->m_program );
+        pState->m_program = NULL;
+    }
+}
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState) {
+    int error;
+    bufferStruct * pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
+#if DEBUG_MEM_ALLOC
+    log_info("malloc bufferStruct * %x\n", pResult);
+#endif
+
+    pResult->m_bufSizeIn = inSize;
+    pResult->m_bufSizeOut = outSize;
+
+    pResult->m_pIn = malloc(inSize);
+    pResult->m_pOut = malloc(outSize);
+#if DEBUG_MEM_ALLOC
+    log_info("malloc m_pIn %x\n", pResult->m_pIn);
+    log_info("malloc m_pOut %x\n", pResult->m_pOut);
+#endif
+
+    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
+                                         inSize, NULL, &error);
+    if( pResult->m_inBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+#if DEBUG_MEM_ALLOC
+    log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
+#endif
+
+    pResult->m_outBuffer = clCreateBuffer( pClState->m_context,
+                                          CL_MEM_WRITE_ONLY,
+                                          outSize,
+                                          NULL,
+                                          &error );
+    if( pResult->m_outBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+#if DEBUG_MEM_ALLOC
+    log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
+#endif
+
+    pResult->m_bufferUploaded = false;
+
+    return pResult;
+}
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState) {
+    if(destroyMe)
+    {
+        if(destroyMe->m_outBuffer != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
+#endif
+            clReleaseMemObject(destroyMe->m_outBuffer);
+            destroyMe->m_outBuffer = NULL;
+        }
+        if(destroyMe->m_inBuffer != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
+#endif
+            clReleaseMemObject(destroyMe->m_inBuffer);
+            destroyMe->m_inBuffer = NULL;
+        }
+        if(destroyMe->m_pIn != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
+#endif
+            free(destroyMe->m_pIn);
+            destroyMe->m_pIn = NULL;
+        }
+        if(destroyMe->m_pOut != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
+#endif
+            free(destroyMe->m_pOut);
+            destroyMe->m_pOut = NULL;
+        }
+#if DEBUG_MEM_ALLOC
+        log_info("delete (free) bufferStruct * %x\n", destroyMe);
+#endif
+        free((void *)destroyMe);
+        destroyMe = NULL;
+    }
+    return destroyMe;
+}
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+                  size_t typeSize,
+                  size_t countIn, size_t countOut )
+{
+    size_t i;
+
+    uint64_t start = 0;
+
+    switch(typeSize)
+    {
+        case 1: {
+            uint8_t* ub = (uint8_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                ub[i] = (uint8_t)start++;
+            }
+            break;
+        }
+        case 2: {
+            uint16_t* us = (uint16_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                us[i] = (uint16_t)start++;
+            }
+            break;
+        }
+        case 4: {
+            if (!g_wimpyMode) {
+                uint32_t* ui = (uint32_t *)(pBufferStruct->m_pIn);
+                for (i=0; i < countIn; ++i) {
+                    ui[i] = (uint32_t)start++;
+                }
+            }
+            else {
+                // The short test doesn't iterate over the entire 32 bit space so
+                // we alternate between positive and negative values
+                int32_t* ui = (int32_t *)(pBufferStruct->m_pIn);
+                int32_t sign = 1;
+                for (i=0; i < countIn; ++i, ++start) {
+                    ui[i] = (int32_t)start*sign;
+                    sign = sign * -1;
+                }
+            }
+            break;
+        }
+        case 8: {
+            // We don't iterate over the entire space of 64 bit so for the
+            // selects, we want to test positive and negative values
+            int64_t* ll = (int64_t *)(pBufferStruct->m_pIn);
+            int64_t sign = 1;
+            for (i=0; i < countIn; ++i, ++start) {
+                ll[i] = start*sign;
+                sign = sign * -1;
+            }
+            break;
+        }
+        default: {
+            log_error("invalid type size %x\n", (int)typeSize);
+        }
+    }
+    // pBufferStruct->m_bufSizeIn
+    // pBufferStruct->m_bufSizeOut
+}
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    if( !pBufferStruct->m_bufferUploaded )
+    {
+        err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
+                                   CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
+                                   pBufferStruct->m_pIn, 0, NULL, NULL);
+#if DEBUG_MEM_ALLOC
+        log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
+#endif
+        if(err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+        pBufferStruct->m_bufferUploaded = true;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 0,
+                         sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
+                         &(pBufferStruct->m_inBuffer));
+#if DEBUG_MEM_ALLOC
+    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
+#endif
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, first arg (0)\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 1,
+                         sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
+                         &(pBufferStruct->m_outBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, second arg (1)\n");
+        return -1;
+    }
+
+#if DEBUG_MEM_ALLOC
+    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
+#endif
+
+    return 0;
+}
+
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
+                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
+                              pBufferStruct->m_pOut, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+    return 0;
+}
+
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                     size_t minAlign)
+{
+    size_t i;
+    cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut);
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if((targetArr[i])%minAlign != (cl_uint)0)
+        {
+            vlog_error("Error %d (of %d).  Expected a multple of %x, got %x\n",
+                       i, pClState->m_numThreads,
+                       minAlign,
+                       targetArr[i]);
+            return -1;
+        }
+    }
+
+    /*    log_info("\n");
+     for(i = 0; i < 4; ++i) {
+     log_info("%lx, ", targetArr[i]);
+     }
+     log_info("\n");
+     fflush(stdout); */
+    return 0;
+}
+
+
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkPackedCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                           size_t totSize, size_t beforeSize)
+{
+    size_t i;
+    cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut);
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if((targetArr[i]-beforeSize)%totSize != (cl_uint)0)
+        {
+            vlog_error("Error %d (of %d).  Expected %d more than a multple of %d, got %d \n",
+                       i, pClState->m_numThreads, beforeSize,
+                       totSize,
+                       targetArr[i]);
+            return -1;
+        }
+    }
+
+    /*    log_info("\n");
+     for(i = 0; i < 4; ++i) {
+     log_info("%lx, ", targetArr[i]);
+     }
+     log_info("\n");
+     fflush(stdout); */
+    return 0;
+}

diff --git a/test_conformance/vec_align/structs.h b/test_conformance/vec_align/structs.h
new file mode 100644
index 0000000..e26b810
--- /dev/null
+++ b/test_conformance/vec_align/structs.h

@@ -0,0 +1,73 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+
+typedef struct _clState
+{
+    cl_device_id m_device;
+    cl_context m_context;
+    cl_command_queue m_queue;
+
+    cl_program m_program;
+    cl_kernel m_kernel;
+    size_t m_numThreads;
+} clState;
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue);
+clState * destroyClState(clState * pState);
+
+int clStateMakeProgram(clState * pState, const char * prog,
+               const char * kernelName);
+void clStateDestroyProgramAndKernel(clState * pState);
+
+int runKernel(clState * pState, size_t numThreads);
+
+typedef struct _bufferStruct
+{
+    void * m_pIn;
+    void * m_pOut;
+
+    cl_mem m_outBuffer;
+    cl_mem m_inBuffer;
+
+    size_t m_bufSizeIn, m_bufSizeOut;
+
+    int       m_bufferUploaded;
+} bufferStruct;
+
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState);
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState);
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState);
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState);
+
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+             size_t minAlign);
+
+int checkPackedCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+               size_t totSize, size_t beforeSize);

diff --git a/test_conformance/vec_align/testBase.h b/test_conformance/vec_align/testBase.h
new file mode 100644
index 0000000..bd72e84
--- /dev/null
+++ b/test_conformance/vec_align/testBase.h

@@ -0,0 +1,28 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h

diff --git a/test_conformance/vec_align/test_vec_align.cpp b/test_conformance/vec_align/test_vec_align.cpp
new file mode 100644
index 0000000..bc666a4
--- /dev/null
+++ b/test_conformance/vec_align/test_vec_align.cpp

@@ -0,0 +1,545 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+#include "harness/testHarness.h"
+
+#include "structs.h"
+
+#include "defines.h"
+
+#include "type_replacer.h"
+
+
+size_t get_align(size_t vecSize)
+{
+    if(vecSize == 3)
+    {
+        return 4;
+    }
+    return vecSize;
+}
+
+/* // Lots of conditionals means this is not gonna be an optimal min on intel. */
+/* // That's okay, make sure we only call a few times per test, not for every */
+/* // element */
+/* size_t min_of_nonzero(size_t a, size_t b) */
+/* { */
+/*     if(a != 0 && (a<=b || b==0)) */
+/*     { */
+/*     return a; */
+/*     } */
+/*     if(b != 0 && (b<a || a==0)) */
+/*     { */
+/*     return b; */
+/*     } */
+/*     return 0; */
+/* } */
+
+
+/* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize, */
+/*                 size_t postSize, size_t typeMultiplePostSize, */
+/*                 ExplicitType kType, size_t vecSize) */
+/* { */
+/*     size_t pre_min = min_of_nonzero(preSize,  */
+/*                     typeMultiplePreSize* */
+/*                     get_explicit_type_size(kType)); */
+/*     size_t post_min = min_of_nonzero(postSize,  */
+/*                     typeMultiplePostSize* */
+/*                     get_explicit_type_size(kType)); */
+/*     size_t struct_min = min_of_nonzero(pre_min, post_min); */
+/*     size_t result =  min_of_nonzero(struct_min, get_align(vecSize) */
+/*                     *get_explicit_type_size(kType)); */
+/*     return result; */
+
+/* } */
+
+
+
+int test_vec_internal(cl_device_id deviceID, cl_context context,
+                      cl_command_queue queue, const char * pattern,
+                      const char * testName, size_t bufSize,
+                      size_t preSize, size_t typeMultiplePreSize,
+                      size_t postSize, size_t typeMultiplePostSize)
+{
+    int err;
+    int typeIdx, vecSizeIdx;
+
+    char tmpBuffer[2048];
+    char srcBuffer[2048];
+
+    size_t preSizeBytes, postSizeBytes, typeSize, totSize;
+
+    clState * pClState = newClState(deviceID, context, queue);
+    bufferStruct * pBuffers =
+    newBufferStruct(bufSize, bufSize*sizeof(cl_uint)/sizeof(cl_char), pClState);
+
+    if(pBuffers == NULL) {
+        destroyClState(pClState);
+        vlog_error("%s : Could not create buffer\n", testName);
+        return -1;
+    }
+
+    for(typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
+    {
+
+        // Skip doubles if it is not supported otherwise enable pragma
+        if (types[typeIdx] == kDouble) {
+            if (!is_extension_available(deviceID, "cl_khr_fp64")) {
+                continue;
+            } else {
+                doReplace(tmpBuffer, 2048, pattern,
+                          ".PRAGMA.",  "#pragma OPENCL EXTENSION cl_khr_fp64: ",
+                          ".STATE.", "enable");
+            }
+        } else {
+            if (types[typeIdx] == kLong || types[typeIdx] == kULong) {
+                if (gIsEmbedded)
+                    continue;
+            }
+
+            doReplace(tmpBuffer, 2048, pattern,
+                      ".PRAGMA.",  " ",
+                      ".STATE.", " ");
+        }
+
+        typeSize = get_explicit_type_size(types[typeIdx]);
+        preSizeBytes = preSize + typeSize*typeMultiplePreSize;
+        postSizeBytes = postSize + typeSize*typeMultiplePostSize;
+
+
+
+        for(vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)  {
+
+            totSize = preSizeBytes + postSizeBytes +
+            typeSize*get_align(g_arrVecSizes[vecSizeIdx]);
+
+            doReplace(srcBuffer, 2048, tmpBuffer,
+                      ".TYPE.",  g_arrTypeNames[typeIdx],
+                      ".NUM.", g_arrVecSizeNames[vecSizeIdx]);
+
+            if(srcBuffer[0] == '\0') {
+                vlog_error("%s: failed to fill source buf for type %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
+            // fflush(stdout);
+
+            err = clStateMakeProgram(pClState, srcBuffer, testName );
+            if (err) {
+                vlog_error("%s: Error compiling \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = pushArgs(pBuffers, pClState);
+            if(err != 0) {
+                vlog_error("%s: failed to push args %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // log_info("About to Run kernel\n"); fflush(stdout);
+            // now we run the kernel
+            err = runKernel(pClState,
+                            bufSize/(g_arrVecSizes[vecSizeIdx]* g_arrTypeSizes[typeIdx]));
+            if(err != 0) {
+                vlog_error("%s: runKernel fail (%ld threads) %s%s\n",
+                           testName, pClState->m_numThreads,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // log_info("About to retrieve results\n"); fflush(stdout);
+            err = retrieveResults(pBuffers, pClState);
+            if(err != 0) {
+                vlog_error("%s: failed to retrieve results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+
+
+            if(preSizeBytes+postSizeBytes == 0)
+            {
+                // log_info("About to Check Correctness\n"); fflush(stdout);
+                err = checkCorrectness(pBuffers, pClState,
+                                       get_align(g_arrVecSizes[vecSizeIdx])*
+                                       typeSize);
+            }
+            else
+            {
+                // we're checking for an aligned struct
+                err = checkPackedCorrectness(pBuffers, pClState, totSize,
+                                             preSizeBytes);
+            }
+
+            if(err != 0) {
+                vlog_error("%s: incorrect results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                vlog_error("%s: Source was \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            clStateDestroyProgramAndKernel(pClState);
+
+        }
+    }
+
+    destroyBufferStruct(pBuffers, pClState);
+
+    destroyClState(pClState);
+
+
+    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
+    return 0; // -1; // fails on account of not being written.
+}
+
+
+
+const char * patterns[] = {
+    ".PRAGMA..STATE.\n"
+    "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct myUnpackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    .SRC_SCOPE. testStruct test;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    .SRC_SCOPE. testStruct test;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. uchar *)&test);\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct myStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE.  testStruct *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - (.SRC_SCOPE. uchar *)&(source[0]));\n"
+    "}\n",
+    // __attribute__ ((packed))
+};
+
+
+
+const char * pre_substitution_arr[] = {
+    "",
+    "char c;\n",
+    "short3 s;",
+    ".TYPE.3 tPre;\n",
+    ".TYPE. arrPre[5];\n",
+    ".TYPE. arrPre[12];\n",
+    NULL
+};
+
+
+// alignments of everything in pre_substitution_arr as raw alignments
+// 0 if such a thing is meaningless
+size_t pre_align_arr[] = {
+    0,
+    sizeof(cl_char),
+    4*sizeof(cl_short),
+    0, // taken care of in type_multiple_pre_align_arr
+    0,
+    0
+};
+
+// alignments of everything in pre_substitution_arr as multiples of
+// sizeof(.TYPE.)
+// 0 if such a thing is meaningless
+size_t type_multiple_pre_align_arr[] = {
+    0,
+    0,
+    0,
+    4,
+    5,
+    12
+};
+
+const char * post_substitution_arr[] = {
+    "",
+    "char cPost;\n",
+    ".TYPE. arrPost[3];\n",
+    ".TYPE. arrPost[5];\n",
+    ".TYPE.3 arrPost;\n",
+    ".TYPE. arrPost[12];\n",
+    NULL
+};
+
+
+// alignments of everything in post_substitution_arr as raw alignments
+// 0 if such a thing is meaningless
+size_t post_align_arr[] = {
+    0,
+    sizeof(cl_char),
+    0, // taken care of in type_multiple_post_align_arr
+    0,
+    0,
+    0
+};
+
+// alignments of everything in post_substitution_arr as multiples of
+// sizeof(.TYPE.)
+// 0 if such a thing is meaningless
+size_t type_multiple_post_align_arr[] = {
+    0,
+    0,
+    3,
+    5,
+    4,
+    12
+};
+
+// there hsould be a packed version of this?
+int test_vec_align_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp[2048];
+    int result;
+
+    log_info("Testing global\n");
+    doReplace(tmp, (size_t)2048, patterns[0],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+    result = test_vec_internal(deviceID, context, queue, tmp,
+                               "test_vec_align_array",
+                               BUFFER_SIZE, 0, 0, 0, 0);
+    return result;
+}
+
+
+int test_vec_align_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+    log_info("testing __private\n");
+    doReplace(tmp2, (size_t)2048, patterns[1],
+              ".SRC_SCOPE.",  "__private",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct",
+                                       512, 0, 0, 0, 0);
+            if (result != 0) {
+                return result;
+            }
+        }
+    }
+
+    log_info("testing __local\n");
+    doReplace(tmp2, (size_t)2048, patterns[1],
+              ".SRC_SCOPE.",  "__local",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct",
+                                       512, 0, 0, 0, 0);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+
+    log_info("Testing __private\n");
+    doReplace(tmp2, (size_t)2048, patterns[2],
+              ".SRC_SCOPE.",  "__private",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct",
+                                       512, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+
+    log_info("testing __local\n");
+    doReplace(tmp2, (size_t)2048, patterns[2],
+              ".SRC_SCOPE.",  "__local",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct",
+                                       512, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if (result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+
+    log_info("testing __global\n");
+    doReplace(tmp2, (size_t)2048, patterns[3],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct_arr",
+                                       BUFFER_SIZE, 0, 0, 0, 0);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+
+    log_info("Testing __global\n");
+    doReplace(tmp2, (size_t)2048, patterns[4],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct_arr",
+                                       BUFFER_SIZE, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if(result != 0)
+                return result;
+        }
+    }
+    return 0;
+}
+

diff --git a/test_conformance/vec_align/type_replacer.cpp b/test_conformance/vec_align/type_replacer.cpp
new file mode 100644
index 0000000..74967b2
--- /dev/null
+++ b/test_conformance/vec_align/type_replacer.cpp

@@ -0,0 +1,115 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <string.h>
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif // !_MSC_VER
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr1;
+    const char * ptr2;
+    size_t nJump;
+    size_t len1, len2;
+    size_t lenReplace1, lenReplace2;
+    len1 = strlen(stringToReplace1);
+    len2 = strlen(stringToReplace2);
+    lenReplace1 = strlen(replaceWith1);
+    lenReplace2 = strlen(replaceWith2);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr1 = strstr(sourcePtr, stringToReplace1);
+        ptr2 = strstr(sourcePtr, stringToReplace2);
+        if(ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
+        {
+            nJump = ptr1-sourcePtr;
+            if(((uintptr_t)ptr1-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len1;
+            strcpy(destPtr, replaceWith1);
+            destPtr += lenReplace1;
+        }
+        else if(ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
+        {
+            nJump = ptr2-sourcePtr;
+            if(nJump > destLength-copyCount) { return -2; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len2;
+            strcpy(destPtr, replaceWith2);
+            destPtr += lenReplace2;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr;
+    size_t nJump;
+    size_t len;
+    size_t lenReplace;
+    len = strlen(stringToReplace);
+    lenReplace = strlen(replaceWith);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr = strstr(sourcePtr, stringToReplace);
+        if(ptr != NULL)
+        {
+            nJump = ptr-sourcePtr;
+            if(((uintptr_t)ptr-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len;
+            strcpy(destPtr, replaceWith);
+            destPtr += lenReplace;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}

diff --git a/test_conformance/vec_align/type_replacer.h b/test_conformance/vec_align/type_replacer.h
new file mode 100644
index 0000000..f50b08d
--- /dev/null
+++ b/test_conformance/vec_align/type_replacer.h

@@ -0,0 +1,23 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdlib.h>
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2);
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith);

diff --git a/test_conformance/vec_step/CMakeLists.txt b/test_conformance/vec_step/CMakeLists.txt
new file mode 100644
index 0000000..c18fc1b
--- /dev/null
+++ b/test_conformance/vec_step/CMakeLists.txt

@@ -0,0 +1,11 @@
+set(MODULE_NAME VECSTEP)
+
+set(${MODULE_NAME}_SOURCES
+    globals.cpp
+        test_step.cpp
+        main.cpp
+        structs.cpp
+        type_replacer.cpp
+)
+
+include(../CMakeCommon.txt)

diff --git a/test_conformance/vec_step/defines.h b/test_conformance/vec_step/defines.h
new file mode 100644
index 0000000..5f364e4
--- /dev/null
+++ b/test_conformance/vec_step/defines.h

@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+#define NUM_VECTOR_SIZES 6
+
+extern int g_arrVecSizes[NUM_VECTOR_SIZES];
+extern int g_arrVecSteps[NUM_VECTOR_SIZES];
+extern bool g_wimpyMode;
+
+extern const char * g_arrVecSizeNames[NUM_VECTOR_SIZES];
+
+// Define the buffer size that we want to block our test with
+#define BUFFER_SIZE (1024*1024)
+#define KPAGESIZE 4096
+
+extern ExplicitType types[];
+
+extern const char *g_arrTypeNames[];
+extern const size_t g_arrTypeSizes[];

diff --git a/test_conformance/vec_step/globals.cpp b/test_conformance/vec_step/globals.cpp
new file mode 100644
index 0000000..029ae1c
--- /dev/null
+++ b/test_conformance/vec_step/globals.cpp

@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "defines.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+int g_arrVecSizes[NUM_VECTOR_SIZES] = {1,2,3,4,8,16};
+int g_arrVecSteps[NUM_VECTOR_SIZES] = {1,2,4,4,8,16};
+const char * g_arrVecSizeNames[NUM_VECTOR_SIZES] = {"", "2","3","4","8","16"};
+
+bool g_wimpyMode = false;
+
+ExplicitType types[] = { kChar, kUChar,
+             kShort, kUShort,
+             kInt, kUInt,
+             kLong, kULong,
+             kFloat, kDouble,
+             kNumExplicitTypes };
+
+
+const char *g_arrTypeNames[] =
+    {
+    "char",  "uchar",
+    "short", "ushort",
+    "int",   "uint",
+    "long",  "ulong",
+    "float", "double"
+    };
+
+extern const size_t g_arrTypeSizes[] =
+    {
+    1, 1,
+    2, 2,
+    4, 4,
+    8, 8,
+    4, 8
+    };
+

diff --git a/test_conformance/vec_step/main.cpp b/test_conformance/vec_step/main.cpp
new file mode 100644
index 0000000..9742a28
--- /dev/null
+++ b/test_conformance/vec_step/main.cpp

@@ -0,0 +1,40 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+test_definition test_list[] = {
+    ADD_TEST( step_type ),
+    ADD_TEST( step_var ),
+    ADD_TEST( step_typedef_type ),
+    ADD_TEST( step_typedef_var ),
+};
+
+const int test_num = ARRAY_SIZE( test_list );
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
+}
+

diff --git a/test_conformance/vec_step/procs.h b/test_conformance/vec_step/procs.h
new file mode 100644
index 0000000..382a36b
--- /dev/null
+++ b/test_conformance/vec_step/procs.h

@@ -0,0 +1,43 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+
+// The number of errors to print out for each test in the shuffle tests
+#define MAX_ERRORS_TO_PRINT 1
+
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+
+/*
+    test_step_type,
+    test_step_var,
+    test_step_typedef_type,
+    test_step_typedef_var,
+*/
+
+extern int test_step_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_step_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_step_typedef_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_step_typedef_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);

diff --git a/test_conformance/vec_step/structs.cpp b/test_conformance/vec_step/structs.cpp
new file mode 100644
index 0000000..b36e892
--- /dev/null
+++ b/test_conformance/vec_step/structs.cpp

@@ -0,0 +1,285 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "structs.h"
+
+
+#include "defines.h"
+
+/** typedef struct _bufferStruct
+ {
+ void * m_pIn;
+ void * m_pOut;
+
+ cl_mem m_outBuffer;
+ cl_mem m_inBuffer;
+
+ size_t m_bufSize;
+ } bufferStruct;
+ */
+
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue)
+{
+    clState * pResult = (clState *)malloc(sizeof(clState));
+
+    pResult->m_device = device;
+    pResult->m_context = context;
+    pResult->m_queue = queue;
+
+    pResult->m_kernel = NULL; pResult->m_program = NULL;
+    return pResult;
+}
+
+clState * destroyClState(clState * pState)
+{
+    clStateDestroyProgramAndKernel(pState);
+    free(pState);
+    return NULL;
+}
+
+
+int clStateMakeProgram(clState * pState, const char * prog,
+                       const char * kernelName)
+{
+    const char * srcArr[1] = {NULL};
+    srcArr[0] = prog;
+    int err = create_single_kernel_helper(pState->m_context,
+                                          &(pState->m_program),
+                                          &(pState->m_kernel),
+                                          1, srcArr, kernelName );
+    return err;
+}
+
+int runKernel(clState * pState, size_t numThreads) {
+    int err;
+    pState->m_numThreads = numThreads;
+    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel,
+                                 1, NULL, &(pState->m_numThreads),
+                                 NULL, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel returned %d (%x)\n",
+                  err, err);
+        return -1;
+    }
+    return 0;
+}
+
+
+void clStateDestroyProgramAndKernel(clState * pState)
+{
+    if(pState->m_kernel != NULL) {
+        clReleaseKernel( pState->m_kernel );
+        pState->m_kernel = NULL;
+    }
+    if(pState->m_program != NULL) {
+        clReleaseProgram( pState->m_program );
+        pState->m_program = NULL;
+    }
+}
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState) {
+    int error;
+    bufferStruct * pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
+
+    pResult->m_bufSizeIn = inSize;
+    pResult->m_bufSizeOut = outSize;
+
+    pResult->m_pIn = malloc(inSize);
+    pResult->m_pOut = malloc(outSize);
+
+    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
+                                         inSize, NULL, &error);
+    if( pResult->m_inBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+
+    pResult->m_outBuffer = clCreateBuffer( pClState->m_context,
+                                          CL_MEM_WRITE_ONLY,
+                                          outSize,
+                                          NULL,
+                                          &error );
+    if( pResult->m_outBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+
+    return pResult;
+}
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState) {
+    if(destroyMe)
+    {
+        if(destroyMe->m_outBuffer != NULL) {
+            clReleaseMemObject(destroyMe->m_outBuffer);
+            destroyMe->m_outBuffer = NULL;
+        }
+        if(destroyMe->m_inBuffer != NULL) {
+            clReleaseMemObject(destroyMe->m_inBuffer);
+            destroyMe->m_inBuffer = NULL;
+        }
+        if(destroyMe->m_pIn != NULL) {
+            free(destroyMe->m_pIn);
+            destroyMe->m_pIn = NULL;
+        }
+        if(destroyMe->m_pOut != NULL) {
+            free(destroyMe->m_pOut);
+            destroyMe->m_pOut = NULL;
+        }
+
+        free((void *)destroyMe);
+        destroyMe = NULL;
+    }
+    return destroyMe;
+}
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+                  size_t typeSize,
+                  size_t countIn, size_t countOut )
+{
+    size_t i;
+
+    uint64_t start = 0;
+
+    switch(typeSize)
+    {
+        case 1: {
+            uint8_t* ub = (uint8_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                ub[i] = (uint8_t)start++;
+            }
+            break;
+        }
+        case 2: {
+            uint16_t* us = (uint16_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                us[i] = (uint16_t)start++;
+            }
+            break;
+        }
+        case 4: {
+            if (!g_wimpyMode) {
+                uint32_t* ui = (uint32_t *)(pBufferStruct->m_pIn);
+                for (i=0; i < countIn; ++i) {
+                    ui[i] = (uint32_t)start++;
+                }
+            }
+            else {
+                // The short test doesn't iterate over the entire 32 bit space so
+                // we alternate between positive and negative values
+                int32_t* ui = (int32_t *)(pBufferStruct->m_pIn);
+                int32_t sign = 1;
+                for (i=0; i < countIn; ++i, ++start) {
+                    ui[i] = (int32_t)start*sign;
+                    sign = sign * -1;
+                }
+            }
+            break;
+        }
+        case 8: {
+            // We don't iterate over the entire space of 64 bit so for the
+            // selects, we want to test positive and negative values
+            int64_t* ll = (int64_t *)(pBufferStruct->m_pIn);
+            int64_t sign = 1;
+            for (i=0; i < countIn; ++i, ++start) {
+                ll[i] = start*sign;
+                sign = sign * -1;
+            }
+            break;
+        }
+        default: {
+            log_error("invalid type size %x\n", (int)typeSize);
+        }
+    }
+    // pBufferStruct->m_bufSizeIn
+    // pBufferStruct->m_bufSizeOut
+}
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
+                               CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
+                               pBufferStruct->m_pIn, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 0,
+                         sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
+                         &(pBufferStruct->m_inBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, first arg (0)\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 1,
+                         sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
+                         &(pBufferStruct->m_outBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, second arg (1)\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
+                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
+                              pBufferStruct->m_pOut, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+    return 0;
+}
+
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                     size_t typeSize,
+                     size_t vecWidth)
+{
+    size_t i;
+    cl_int targetSize = (cl_int) vecWidth;
+    cl_int * targetArr = (cl_int *)(pBufferStruct->m_pOut);
+    if(targetSize == 3)
+    {
+        targetSize = 4; // hack for 4-aligned vec3 types
+    }
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if(targetArr[i] != targetSize)
+        {
+            vlog_error("Error %ld (of %ld).  Expected %d, got %d\n",
+                       i, pClState->m_numThreads,
+                       targetSize, targetArr[i]);
+            return -1;
+        }
+    }
+    return 0;
+}

diff --git a/test_conformance/vec_step/structs.h b/test_conformance/vec_step/structs.h
new file mode 100644
index 0000000..37e5524
--- /dev/null
+++ b/test_conformance/vec_step/structs.h

@@ -0,0 +1,67 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+
+typedef struct _clState
+{
+    cl_device_id m_device;
+    cl_context m_context;
+    cl_command_queue m_queue;
+
+    cl_program m_program;
+    cl_kernel m_kernel;
+    size_t m_numThreads;
+} clState;
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue);
+clState * destroyClState(clState * pState);
+
+int clStateMakeProgram(clState * pState, const char * prog,
+               const char * kernelName);
+void clStateDestroyProgramAndKernel(clState * pState);
+
+int runKernel(clState * pState, size_t numThreads);
+
+typedef struct _bufferStruct
+{
+    void * m_pIn;
+    void * m_pOut;
+
+    cl_mem m_outBuffer;
+    cl_mem m_inBuffer;
+
+    size_t m_bufSizeIn, m_bufSizeOut;
+} bufferStruct;
+
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState);
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState);
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState);
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState);
+
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);

diff --git a/test_conformance/vec_step/testBase.h b/test_conformance/vec_step/testBase.h
new file mode 100644
index 0000000..bd72e84
--- /dev/null
+++ b/test_conformance/vec_step/testBase.h

@@ -0,0 +1,28 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h

diff --git a/test_conformance/vec_step/test_step.cpp b/test_conformance/vec_step/test_step.cpp
new file mode 100644
index 0000000..a2c57c3
--- /dev/null
+++ b/test_conformance/vec_step/test_step.cpp

@@ -0,0 +1,252 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+#include "harness/testHarness.h"
+
+#include "structs.h"
+
+#include "defines.h"
+
+#include "type_replacer.h"
+
+
+/*
+ test_step_type,
+ test_step_var,
+ test_step_typedef_type,
+ test_step_typedef_var,
+ */
+
+
+
+int test_step_internal(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char * pattern, const char * testName)
+{
+    int err;
+    int typeIdx, vecSizeIdx;
+
+    char tempBuffer[2048];
+
+    clState * pClState = newClState(deviceID, context, queue);
+    bufferStruct * pBuffers =
+    newBufferStruct(BUFFER_SIZE, BUFFER_SIZE, pClState);
+
+    if(pBuffers == NULL) {
+        destroyClState(pClState);
+        vlog_error("%s : Could not create buffer\n", testName);
+        return -1;
+    }
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        return -1;
+    }
+    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+
+    for(typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
+    {
+        if( types[ typeIdx ] == kDouble )
+        {
+            // If we're testing doubles, we need to check for support first
+            if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            {
+                log_info( "Not testing doubles (unsupported on this device)\n" );
+                continue;
+            }
+        }
+
+    if( types[ typeIdx ] == kLong || types[ typeIdx ] == kULong )
+    {
+        // If we're testing long/ulong, we need to check for embedded support
+        if( gIsEmbedded && !is_extension_available( deviceID, "cles_khr_int64") )
+        {
+        log_info( "Not testing longs (unsupported on this embedded device)\n" );
+        continue;
+        }
+    }
+
+        char srcBuffer[2048];
+
+        doSingleReplace(tempBuffer, 2048, pattern,
+                        ".EXTENSIONS.", types[typeIdx] == kDouble
+                            ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                            : "");
+
+        for(vecSizeIdx = 0; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
+        {
+            doReplace(srcBuffer, 2048, tempBuffer,
+                      ".TYPE.",  g_arrTypeNames[typeIdx],
+                      ".NUM.", g_arrVecSizeNames[vecSizeIdx]);
+
+            if(srcBuffer[0] == '\0') {
+                vlog_error("%s: failed to fill source buf for type %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = clStateMakeProgram(pClState, srcBuffer, testName );
+            if (err)
+            {
+                vlog_error("%s: Error compiling \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = pushArgs(pBuffers, pClState);
+            if(err != 0)
+            {
+                vlog_error("%s: failed to push args %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // now we run the kernel
+            err = runKernel(pClState, 1024);
+            if(err != 0)
+            {
+                vlog_error("%s: runKernel fail (%ld threads) %s%s\n",
+                           testName, pClState->m_numThreads,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = retrieveResults(pBuffers, pClState);
+            if(err != 0)
+            {
+                vlog_error("%s: failed to retrieve results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = checkCorrectness(pBuffers, pClState,
+                                   g_arrTypeSizes[typeIdx],
+                                   g_arrVecSizes[vecSizeIdx]);
+
+            if(err != 0)
+            {
+                vlog_error("%s: incorrect results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                vlog_error("%s: Source was \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+        }
+
+    }
+
+    destroyBufferStruct(pBuffers, pClState);
+
+    destroyClState(pClState);
+
+
+    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
+    return 0; // -1; // fails on account of not being written.
+}
+
+const char * patterns[] = {
+    ".EXTENSIONS.\n"
+    "__kernel void test_step_type(__global .TYPE..NUM. *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(.TYPE..NUM.);\n"
+    "\n"
+    "}\n",
+
+    ".EXTENSIONS.\n"
+    "__kernel void test_step_var(__global .TYPE..NUM. *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(source[tid]);\n"
+    "\n"
+    "}\n",
+
+    ".EXTENSIONS.\n"
+    " typedef .TYPE..NUM. TypeToTest;\n"
+    "__kernel void test_step_typedef_type(__global TypeToTest *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(TypeToTest);\n"
+    "\n"
+    "}\n",
+
+    ".EXTENSIONS.\n"
+    " typedef .TYPE..NUM. TypeToTest;\n"
+    "__kernel void test_step_typedef_var(__global TypeToTest *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(source[tid]);\n"
+    "\n"
+    "}\n",
+};
+
+/*
+ test_step_type,
+ test_step_var,
+ test_step_typedef_type,
+ test_step_typedef_var,
+ */
+
+int test_step_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[0],
+                              "test_step_type");
+}
+
+int test_step_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[1],
+                              "test_step_var");
+}
+
+int test_step_typedef_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[2],
+                              "test_step_typedef_type");
+}
+
+int test_step_typedef_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[3],
+                              "test_step_typedef_var");
+}

diff --git a/test_conformance/vec_step/type_replacer.cpp b/test_conformance/vec_step/type_replacer.cpp
new file mode 100644
index 0000000..74967b2
--- /dev/null
+++ b/test_conformance/vec_step/type_replacer.cpp

@@ -0,0 +1,115 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <string.h>
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif // !_MSC_VER
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr1;
+    const char * ptr2;
+    size_t nJump;
+    size_t len1, len2;
+    size_t lenReplace1, lenReplace2;
+    len1 = strlen(stringToReplace1);
+    len2 = strlen(stringToReplace2);
+    lenReplace1 = strlen(replaceWith1);
+    lenReplace2 = strlen(replaceWith2);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr1 = strstr(sourcePtr, stringToReplace1);
+        ptr2 = strstr(sourcePtr, stringToReplace2);
+        if(ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
+        {
+            nJump = ptr1-sourcePtr;
+            if(((uintptr_t)ptr1-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len1;
+            strcpy(destPtr, replaceWith1);
+            destPtr += lenReplace1;
+        }
+        else if(ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
+        {
+            nJump = ptr2-sourcePtr;
+            if(nJump > destLength-copyCount) { return -2; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len2;
+            strcpy(destPtr, replaceWith2);
+            destPtr += lenReplace2;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr;
+    size_t nJump;
+    size_t len;
+    size_t lenReplace;
+    len = strlen(stringToReplace);
+    lenReplace = strlen(replaceWith);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr = strstr(sourcePtr, stringToReplace);
+        if(ptr != NULL)
+        {
+            nJump = ptr-sourcePtr;
+            if(((uintptr_t)ptr-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len;
+            strcpy(destPtr, replaceWith);
+            destPtr += lenReplace;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}

diff --git a/test_conformance/vec_step/type_replacer.h b/test_conformance/vec_step/type_replacer.h
new file mode 100644
index 0000000..f50b08d
--- /dev/null
+++ b/test_conformance/vec_step/type_replacer.h

@@ -0,0 +1,23 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdlib.h>
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2);
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith);

diff --git a/test_conformance/vectors/CMakeLists.txt b/test_conformance/vectors/CMakeLists.txt
deleted file mode 100644
index 278de20..0000000
--- a/test_conformance/vectors/CMakeLists.txt
+++ /dev/null

@@ -1,12 +0,0 @@
-set(MODULE_NAME VECTORS)
-
-set(${MODULE_NAME}_SOURCES
-    globals.cpp
-        main.cpp
-        structs.cpp
-        test_step.cpp
-        test_vec_align.cpp
-        type_replacer.cpp
-)
-
-include(../CMakeCommon.txt)

diff --git a/test_conformance/vectors/defines.h b/test_conformance/vectors/defines.h
deleted file mode 100644
index c96c3da..0000000
--- a/test_conformance/vectors/defines.h
+++ /dev/null

@@ -1,42 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/errorHelpers.h"
-#include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
-#include "harness/typeWrappers.h"
-#include "harness/conversions.h"
-#include "harness/mt19937.h"
-
-
-// 1,2,3,4,8,16 or
-// 1,2,4,8,16,3
-#define NUM_VECTOR_SIZES 6
-
-extern int g_arrVecSizes[NUM_VECTOR_SIZES];
-extern int g_arrVecSteps[NUM_VECTOR_SIZES];
-extern bool g_wimpyMode;
-
-extern const char *g_arrVecSizeNames[NUM_VECTOR_SIZES];
-extern size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES];
-
-// Define the buffer size that we want to block our test with
-#define BUFFER_SIZE (1024 * 1024)
-#define KPAGESIZE 4096
-
-extern ExplicitType types[];
-
-extern const char *g_arrTypeNames[];
-extern const size_t g_arrTypeSizes[];

diff --git a/test_conformance/vectors/globals.cpp b/test_conformance/vectors/globals.cpp
deleted file mode 100644
index 6dee6d9..0000000
--- a/test_conformance/vectors/globals.cpp
+++ /dev/null

@@ -1,46 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "defines.h"
-
-
-// 1,2,3,4,8,16 or
-// 1,2,4,8,16,3
-int g_arrVecSizes[NUM_VECTOR_SIZES] = { 1, 2, 3, 4, 8, 16 };
-int g_arrVecSteps[NUM_VECTOR_SIZES] = { 1, 2, 4, 4, 8, 16 };
-const char *g_arrVecSizeNames[NUM_VECTOR_SIZES] = {
-    "", "2", "3", "4", "8", "16"
-};
-size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES] = {
-    (size_t)0,
-    (size_t)0x1, // 2
-    (size_t)0x3, // 3
-    (size_t)0x3, // 4
-    (size_t)0x7, // 8
-    (size_t)0xf // 16
-};
-
-bool g_wimpyMode = false;
-
-ExplicitType types[] = {
-    kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-    kULong, kFloat, kDouble, kNumExplicitTypes
-};
-
-
-const char *g_arrTypeNames[] = { "char", "uchar", "short", "ushort", "int",
-                                 "uint", "long",  "ulong", "float",  "double" };
-
-extern const size_t g_arrTypeSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };

diff --git a/test_conformance/vectors/main.cpp b/test_conformance/vectors/main.cpp
deleted file mode 100644
index e499faf..0000000
--- a/test_conformance/vectors/main.cpp
+++ /dev/null

@@ -1,44 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "procs.h"
-#include "harness/testHarness.h"
-
-#if !defined(_WIN32)
-#include <unistd.h>
-#endif
-
-test_definition test_list[] = {
-    ADD_TEST(step_type),
-    ADD_TEST(step_var),
-    ADD_TEST(step_typedef_type),
-    ADD_TEST(step_typedef_var),
-    ADD_TEST(vec_align_array),
-    ADD_TEST(vec_align_struct),
-    ADD_TEST(vec_align_packed_struct),
-    ADD_TEST(vec_align_struct_arr),
-    ADD_TEST(vec_align_packed_struct_arr),
-};
-
-const int test_num = ARRAY_SIZE(test_list);
-
-int main(int argc, const char *argv[])
-{
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
-}

diff --git a/test_conformance/vectors/procs.h b/test_conformance/vectors/procs.h
deleted file mode 100644
index db423a6..0000000
--- a/test_conformance/vectors/procs.h
+++ /dev/null

@@ -1,55 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/errorHelpers.h"
-#include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
-#include "harness/typeWrappers.h"
-#include "harness/conversions.h"
-#include "harness/mt19937.h"
-
-// The number of errors to print out for each test in the shuffle tests
-#define MAX_ERRORS_TO_PRINT 1
-
-
-extern int create_program_and_kernel(const char *source,
-                                     const char *kernel_name,
-                                     cl_program *program_ret,
-                                     cl_kernel *kernel_ret);
-
-extern int test_step_type(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements);
-extern int test_step_var(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements);
-extern int test_step_typedef_type(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_step_typedef_var(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-
-int test_vec_align_array(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements);
-
-int test_vec_align_struct(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements);
-
-int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-
-
-int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-
-int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);

diff --git a/test_conformance/vectors/structs.cpp b/test_conformance/vectors/structs.cpp
deleted file mode 100644
index 9bfa389..0000000
--- a/test_conformance/vectors/structs.cpp
+++ /dev/null

@@ -1,405 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "structs.h"
-
-
-#include "defines.h"
-
-#define DEBUG_MEM_ALLOC 0
-
-/** typedef struct _bufferStruct
- {
- void * m_pIn;
- void * m_pOut;
-
- cl_mem m_outBuffer;
- cl_mem m_inBuffer;
-
- size_t m_bufSize;
- } bufferStruct;
- */
-
-
-clState *newClState(cl_device_id device, cl_context context,
-                    cl_command_queue queue)
-{
-    clState *pResult = (clState *)malloc(sizeof(clState));
-#if DEBUG_MEM_ALLOC
-    log_info("malloc clState * %x\n", pResult);
-#endif
-
-    pResult->m_device = device;
-    pResult->m_context = context;
-    pResult->m_queue = queue;
-
-    pResult->m_kernel = NULL;
-    pResult->m_program = NULL;
-    return pResult;
-}
-
-clState *destroyClState(clState *pState)
-{
-    clStateDestroyProgramAndKernel(pState);
-#if DEBUG_MEM_ALLOC
-    log_info("delete (free) clState * %x\n", pState);
-#endif
-    free(pState);
-    return NULL;
-}
-
-
-int clStateMakeProgram(clState *pState, const char *prog,
-                       const char *kernelName)
-{
-    const char *srcArr[1] = { NULL };
-    srcArr[0] = prog;
-    int err =
-        create_single_kernel_helper(pState->m_context, &(pState->m_program),
-                                    &(pState->m_kernel), 1, srcArr, kernelName);
-#if DEBUG_MEM_ALLOC
-    log_info("create program and kernel\n");
-#endif
-    return err;
-}
-
-int runKernel(clState *pState, size_t numThreads)
-{
-    int err;
-    pState->m_numThreads = numThreads;
-    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL,
-                                 &(pState->m_numThreads), NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err);
-        return -1;
-    }
-    return 0;
-}
-
-
-void clStateDestroyProgramAndKernel(clState *pState)
-{
-#if DEBUG_MEM_ALLOC
-    log_info("destroy program and kernel\n");
-#endif
-    if (pState->m_kernel != NULL)
-    {
-        clReleaseKernel(pState->m_kernel);
-        pState->m_kernel = NULL;
-    }
-    if (pState->m_program != NULL)
-    {
-        clReleaseProgram(pState->m_program);
-        pState->m_program = NULL;
-    }
-}
-
-bufferStruct *newBufferStruct(size_t inSize, size_t outSize, clState *pClState)
-{
-    int error;
-    bufferStruct *pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
-#if DEBUG_MEM_ALLOC
-    log_info("malloc bufferStruct * %x\n", pResult);
-#endif
-
-    pResult->m_bufSizeIn = inSize;
-    pResult->m_bufSizeOut = outSize;
-
-    pResult->m_pIn = malloc(inSize);
-    pResult->m_pOut = malloc(outSize);
-#if DEBUG_MEM_ALLOC
-    log_info("malloc m_pIn %x\n", pResult->m_pIn);
-    log_info("malloc m_pOut %x\n", pResult->m_pOut);
-#endif
-
-    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
-                                         inSize, NULL, &error);
-    if (pResult->m_inBuffer == NULL)
-    {
-        vlog_error("clCreateArray failed for input (%d)\n", error);
-        return destroyBufferStruct(pResult, pClState);
-    }
-#if DEBUG_MEM_ALLOC
-    log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
-#endif
-
-    pResult->m_outBuffer = clCreateBuffer(
-        pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error);
-    if (pResult->m_outBuffer == NULL)
-    {
-        vlog_error("clCreateArray failed for output (%d)\n", error);
-        return destroyBufferStruct(pResult, pClState);
-    }
-#if DEBUG_MEM_ALLOC
-    log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
-#endif
-
-    pResult->m_bufferUploaded = false;
-
-    return pResult;
-}
-
-bufferStruct *destroyBufferStruct(bufferStruct *destroyMe, clState *pClState)
-{
-    if (destroyMe)
-    {
-        if (destroyMe->m_outBuffer != NULL)
-        {
-#if DEBUG_MEM_ALLOC
-            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
-#endif
-            clReleaseMemObject(destroyMe->m_outBuffer);
-            destroyMe->m_outBuffer = NULL;
-        }
-        if (destroyMe->m_inBuffer != NULL)
-        {
-#if DEBUG_MEM_ALLOC
-            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
-#endif
-            clReleaseMemObject(destroyMe->m_inBuffer);
-            destroyMe->m_inBuffer = NULL;
-        }
-        if (destroyMe->m_pIn != NULL)
-        {
-#if DEBUG_MEM_ALLOC
-            log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
-#endif
-            free(destroyMe->m_pIn);
-            destroyMe->m_pIn = NULL;
-        }
-        if (destroyMe->m_pOut != NULL)
-        {
-#if DEBUG_MEM_ALLOC
-            log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
-#endif
-            free(destroyMe->m_pOut);
-            destroyMe->m_pOut = NULL;
-        }
-#if DEBUG_MEM_ALLOC
-        log_info("delete (free) bufferStruct * %x\n", destroyMe);
-#endif
-        free((void *)destroyMe);
-        destroyMe = NULL;
-    }
-    return destroyMe;
-}
-
-void initContents(bufferStruct *pBufferStruct, clState *pClState,
-                  size_t typeSize, size_t countIn, size_t countOut)
-{
-    size_t i;
-
-    uint64_t start = 0;
-
-    switch (typeSize)
-    {
-        case 1: {
-            uint8_t *ub = (uint8_t *)(pBufferStruct->m_pIn);
-            for (i = 0; i < countIn; ++i)
-            {
-                ub[i] = (uint8_t)start++;
-            }
-            break;
-        }
-        case 2: {
-            uint16_t *us = (uint16_t *)(pBufferStruct->m_pIn);
-            for (i = 0; i < countIn; ++i)
-            {
-                us[i] = (uint16_t)start++;
-            }
-            break;
-        }
-        case 4: {
-            if (!g_wimpyMode)
-            {
-                uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn);
-                for (i = 0; i < countIn; ++i)
-                {
-                    ui[i] = (uint32_t)start++;
-                }
-            }
-            else
-            {
-                // The short test doesn't iterate over the entire 32 bit space
-                // so we alternate between positive and negative values
-                int32_t *ui = (int32_t *)(pBufferStruct->m_pIn);
-                int32_t sign = 1;
-                for (i = 0; i < countIn; ++i, ++start)
-                {
-                    ui[i] = (int32_t)start * sign;
-                    sign = sign * -1;
-                }
-            }
-            break;
-        }
-        case 8: {
-            // We don't iterate over the entire space of 64 bit so for the
-            // selects, we want to test positive and negative values
-            int64_t *ll = (int64_t *)(pBufferStruct->m_pIn);
-            int64_t sign = 1;
-            for (i = 0; i < countIn; ++i, ++start)
-            {
-                ll[i] = start * sign;
-                sign = sign * -1;
-            }
-            break;
-        }
-        default: {
-            log_error("invalid type size %x\n", (int)typeSize);
-        }
-    }
-    // pBufferStruct->m_bufSizeIn
-    // pBufferStruct->m_bufSizeOut
-}
-
-int pushArgs(bufferStruct *pBufferStruct, clState *pClState)
-{
-    int err;
-    if (!pBufferStruct->m_bufferUploaded)
-    {
-        err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
-                                   CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
-                                   pBufferStruct->m_pIn, 0, NULL, NULL);
-#if DEBUG_MEM_ALLOC
-        log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
-#endif
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueWriteBuffer failed\n");
-            return -1;
-        }
-        pBufferStruct->m_bufferUploaded = true;
-    }
-
-    err = clSetKernelArg(
-        pClState->m_kernel, 0,
-        sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
-        &(pBufferStruct->m_inBuffer));
-#if DEBUG_MEM_ALLOC
-    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
-#endif
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed, first arg (0)\n");
-        return -1;
-    }
-
-    err = clSetKernelArg(
-        pClState->m_kernel, 1,
-        sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
-        &(pBufferStruct->m_outBuffer));
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed, second arg (1)\n");
-        return -1;
-    }
-
-#if DEBUG_MEM_ALLOC
-    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
-#endif
-
-    return 0;
-}
-
-int retrieveResults(bufferStruct *pBufferStruct, clState *pClState)
-{
-    int err;
-    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
-                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
-                              pBufferStruct->m_pOut, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-    return 0;
-}
-
-// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
-// and g_arrVecSizes
-int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
-                          size_t minAlign)
-{
-    size_t i;
-    cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
-    for (i = 0; i < pClState->m_numThreads; ++i)
-    {
-        if ((targetArr[i]) % minAlign != (cl_uint)0)
-        {
-            vlog_error("Error %d (of %d).  Expected a multple of %x, got %x\n",
-                       i, pClState->m_numThreads, minAlign, targetArr[i]);
-            return -1;
-        }
-    }
-
-    /*    log_info("\n");
-     for(i = 0; i < 4; ++i) {
-     log_info("%lx, ", targetArr[i]);
-     }
-     log_info("\n");
-     fflush(stdout); */
-    return 0;
-}
-
-int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
-                         size_t typeSize, size_t vecWidth)
-{
-    size_t i;
-    cl_int targetSize = (cl_int)vecWidth;
-    cl_int *targetArr = (cl_int *)(pBufferStruct->m_pOut);
-    if (targetSize == 3)
-    {
-        targetSize = 4; // hack for 4-aligned vec3 types
-    }
-    for (i = 0; i < pClState->m_numThreads; ++i)
-    {
-        if (targetArr[i] != targetSize)
-        {
-            vlog_error("Error %ld (of %ld).  Expected %d, got %d\n", i,
-                       pClState->m_numThreads, targetSize, targetArr[i]);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
-// and g_arrVecSizes
-int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
-                           size_t totSize, size_t beforeSize)
-{
-    size_t i;
-    cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
-    for (i = 0; i < pClState->m_numThreads; ++i)
-    {
-        if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
-        {
-            vlog_error("Error %d (of %d).  Expected %d more than a multple of "
-                       "%d, got %d \n",
-                       i, pClState->m_numThreads, beforeSize, totSize,
-                       targetArr[i]);
-            return -1;
-        }
-    }
-
-    /*    log_info("\n");
-     for(i = 0; i < 4; ++i) {
-     log_info("%lx, ", targetArr[i]);
-     }
-     log_info("\n");
-     fflush(stdout); */
-    return 0;
-}

diff --git a/test_conformance/vectors/structs.h b/test_conformance/vectors/structs.h
deleted file mode 100644
index c6a1725..0000000
--- a/test_conformance/vectors/structs.h
+++ /dev/null

@@ -1,75 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-typedef struct _clState
-{
-    cl_device_id m_device;
-    cl_context m_context;
-    cl_command_queue m_queue;
-
-    cl_program m_program;
-    cl_kernel m_kernel;
-    size_t m_numThreads;
-} clState;
-
-clState* newClState(cl_device_id device, cl_context context,
-                    cl_command_queue queue);
-clState* destroyClState(clState* pState);
-
-int clStateMakeProgram(clState* pState, const char* prog,
-                       const char* kernelName);
-void clStateDestroyProgramAndKernel(clState* pState);
-
-int runKernel(clState* pState, size_t numThreads);
-
-typedef struct _bufferStruct
-{
-    void* m_pIn;
-    void* m_pOut;
-
-    cl_mem m_outBuffer;
-    cl_mem m_inBuffer;
-
-    size_t m_bufSizeIn, m_bufSizeOut;
-
-    int m_bufferUploaded;
-} bufferStruct;
-
-
-bufferStruct* newBufferStruct(size_t inSize, size_t outSize, clState* pClState);
-
-bufferStruct* destroyBufferStruct(bufferStruct* destroyMe, clState* pClState);
-
-void initContents(bufferStruct* pBufferStruct, clState* pClState,
-                  size_t typeSize, size_t vecWidth);
-
-int pushArgs(bufferStruct* pBufferStruct, clState* pClState);
-int retrieveResults(bufferStruct* pBufferStruct, clState* pClState);
-
-int checkCorrectnessStep(bufferStruct* pBufferStruct, clState* pClState,
-                         size_t typeSize, size_t vecWidth);
-// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
-// and g_arrVecSizes
-int checkCorrectnessAlign(bufferStruct* pBufferStruct, clState* pClState,
-                          size_t minAlign);
-
-int checkPackedCorrectness(bufferStruct* pBufferStruct, clState* pClState,
-                           size_t totSize, size_t beforeSize);

diff --git a/test_conformance/vectors/testBase.h b/test_conformance/vectors/testBase.h
deleted file mode 100644
index 63086d7..0000000
--- a/test_conformance/vectors/testBase.h
+++ /dev/null

@@ -1,28 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _testBase_h
-#define _testBase_h
-
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#endif // _testBase_h

diff --git a/test_conformance/vectors/test_step.cpp b/test_conformance/vectors/test_step.cpp
deleted file mode 100644
index 2f6ad18..0000000
--- a/test_conformance/vectors/test_step.cpp
+++ /dev/null

@@ -1,260 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-#include "harness/testHarness.h"
-
-#include "structs.h"
-
-#include "defines.h"
-
-#include "type_replacer.h"
-
-
-/*
- test_step_type,
- test_step_var,
- test_step_typedef_type,
- test_step_typedef_var,
- */
-
-
-int test_step_internal(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, const char* pattern,
-                       const char* testName)
-{
-    int err;
-    int typeIdx, vecSizeIdx;
-
-    char tempBuffer[2048];
-
-    clState* pClState = newClState(deviceID, context, queue);
-    bufferStruct* pBuffers =
-        newBufferStruct(BUFFER_SIZE, BUFFER_SIZE, pClState);
-
-    if (pBuffers == NULL)
-    {
-        destroyClState(pClState);
-        vlog_error("%s : Could not create buffer\n", testName);
-        return -1;
-    }
-
-    // detect whether profile of the device is embedded
-    char profile[1024] = "";
-    err = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile,
-                          NULL);
-    if (err)
-    {
-        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n");
-        return -1;
-    }
-    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
-
-    for (typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
-    {
-        if (types[typeIdx] == kDouble)
-        {
-            // If we're testing doubles, we need to check for support first
-            if (!is_extension_available(deviceID, "cl_khr_fp64"))
-            {
-                log_info("Not testing doubles (unsupported on this device)\n");
-                continue;
-            }
-        }
-
-        if (types[typeIdx] == kLong || types[typeIdx] == kULong)
-        {
-            // If we're testing long/ulong, we need to check for embedded
-            // support
-            if (gIsEmbedded
-                && !is_extension_available(deviceID, "cles_khr_int64"))
-            {
-                log_info("Not testing longs (unsupported on this embedded "
-                         "device)\n");
-                continue;
-            }
-        }
-
-        char srcBuffer[2048];
-
-        doSingleReplace(tempBuffer, 2048, pattern, ".EXTENSIONS.",
-                        types[typeIdx] == kDouble
-                            ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                            : "");
-
-        for (vecSizeIdx = 0; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
-        {
-            doReplace(srcBuffer, 2048, tempBuffer, ".TYPE.",
-                      g_arrTypeNames[typeIdx], ".NUM.",
-                      g_arrVecSizeNames[vecSizeIdx]);
-
-            if (srcBuffer[0] == '\0')
-            {
-                vlog_error("%s: failed to fill source buf for type %s%s\n",
-                           testName, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            err = clStateMakeProgram(pClState, srcBuffer, testName);
-            if (err)
-            {
-                vlog_error("%s: Error compiling \"\n%s\n\"", testName,
-                           srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            err = pushArgs(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to push args %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            // now we run the kernel
-            err = runKernel(pClState, 1024);
-            if (err != 0)
-            {
-                vlog_error("%s: runKernel fail (%ld threads) %s%s\n", testName,
-                           pClState->m_numThreads, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            err = retrieveResults(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to retrieve results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            err = checkCorrectnessStep(pBuffers, pClState,
-                                       g_arrTypeSizes[typeIdx],
-                                       g_arrVecSizes[vecSizeIdx]);
-
-            if (err != 0)
-            {
-                vlog_error("%s: incorrect results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                vlog_error("%s: Source was \"\n%s\n\"", testName, srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-        }
-    }
-
-    destroyBufferStruct(pBuffers, pClState);
-
-    destroyClState(pClState);
-
-
-    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
-    return 0; // -1; // fails on account of not being written.
-}
-
-static const char* patterns[] = {
-    ".EXTENSIONS.\n"
-    "__kernel void test_step_type(__global .TYPE..NUM. *source, __global int "
-    "*dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(.TYPE..NUM.);\n"
-    "\n"
-    "}\n",
-
-    ".EXTENSIONS.\n"
-    "__kernel void test_step_var(__global .TYPE..NUM. *source, __global int "
-    "*dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(source[tid]);\n"
-    "\n"
-    "}\n",
-
-    ".EXTENSIONS.\n"
-    " typedef .TYPE..NUM. TypeToTest;\n"
-    "__kernel void test_step_typedef_type(__global TypeToTest *source, "
-    "__global int *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(TypeToTest);\n"
-    "\n"
-    "}\n",
-
-    ".EXTENSIONS.\n"
-    " typedef .TYPE..NUM. TypeToTest;\n"
-    "__kernel void test_step_typedef_var(__global TypeToTest *source, __global "
-    "int *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(source[tid]);\n"
-    "\n"
-    "}\n",
-};
-
-/*
- test_step_type,
- test_step_var,
- test_step_typedef_type,
- test_step_typedef_var,
- */
-
-int test_step_type(cl_device_id deviceID, cl_context context,
-                   cl_command_queue queue, int num_elements)
-{
-    return test_step_internal(deviceID, context, queue, patterns[0],
-                              "test_step_type");
-}
-
-int test_step_var(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue, int num_elements)
-{
-    return test_step_internal(deviceID, context, queue, patterns[1],
-                              "test_step_var");
-}
-
-int test_step_typedef_type(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-{
-    return test_step_internal(deviceID, context, queue, patterns[2],
-                              "test_step_typedef_type");
-}
-
-int test_step_typedef_var(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-{
-    return test_step_internal(deviceID, context, queue, patterns[3],
-                              "test_step_typedef_var");
-}

diff --git a/test_conformance/vectors/test_vec_align.cpp b/test_conformance/vectors/test_vec_align.cpp
deleted file mode 100644
index 2f392f5..0000000
--- a/test_conformance/vectors/test_vec_align.cpp
+++ /dev/null

@@ -1,540 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-#include "harness/testHarness.h"
-
-#include "structs.h"
-
-#include "defines.h"
-
-#include "type_replacer.h"
-
-
-size_t get_align(size_t vecSize)
-{
-    if (vecSize == 3)
-    {
-        return 4;
-    }
-    return vecSize;
-}
-
-/* // Lots of conditionals means this is not gonna be an optimal min on intel.
- */
-/* // That's okay, make sure we only call a few times per test, not for every */
-/* // element */
-/* size_t min_of_nonzero(size_t a, size_t b) */
-/* { */
-/*     if(a != 0 && (a<=b || b==0)) */
-/*     { */
-/*     return a; */
-/*     } */
-/*     if(b != 0 && (b<a || a==0)) */
-/*     { */
-/*     return b; */
-/*     } */
-/*     return 0; */
-/* } */
-
-
-/* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize,
- */
-/*                 size_t postSize, size_t typeMultiplePostSize, */
-/*                 ExplicitType kType, size_t vecSize) */
-/* { */
-/*     size_t pre_min = min_of_nonzero(preSize,  */
-/*                     typeMultiplePreSize* */
-/*                     get_explicit_type_size(kType)); */
-/*     size_t post_min = min_of_nonzero(postSize,  */
-/*                     typeMultiplePostSize* */
-/*                     get_explicit_type_size(kType)); */
-/*     size_t struct_min = min_of_nonzero(pre_min, post_min); */
-/*     size_t result =  min_of_nonzero(struct_min, get_align(vecSize) */
-/*                     *get_explicit_type_size(kType)); */
-/*     return result; */
-
-/* } */
-
-
-int test_vec_internal(cl_device_id deviceID, cl_context context,
-                      cl_command_queue queue, const char* pattern,
-                      const char* testName, size_t bufSize, size_t preSize,
-                      size_t typeMultiplePreSize, size_t postSize,
-                      size_t typeMultiplePostSize)
-{
-    int err;
-    int typeIdx, vecSizeIdx;
-
-    char tmpBuffer[2048];
-    char srcBuffer[2048];
-
-    size_t preSizeBytes, postSizeBytes, typeSize, totSize;
-
-    clState* pClState = newClState(deviceID, context, queue);
-    bufferStruct* pBuffers = newBufferStruct(
-        bufSize, bufSize * sizeof(cl_uint) / sizeof(cl_char), pClState);
-
-    if (pBuffers == NULL)
-    {
-        destroyClState(pClState);
-        vlog_error("%s : Could not create buffer\n", testName);
-        return -1;
-    }
-
-    for (typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
-    {
-
-        // Skip doubles if it is not supported otherwise enable pragma
-        if (types[typeIdx] == kDouble)
-        {
-            if (!is_extension_available(deviceID, "cl_khr_fp64"))
-            {
-                continue;
-            }
-            else
-            {
-                doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.",
-                          "#pragma OPENCL EXTENSION cl_khr_fp64: ", ".STATE.",
-                          "enable");
-            }
-        }
-        else
-        {
-            if (types[typeIdx] == kLong || types[typeIdx] == kULong)
-            {
-                if (gIsEmbedded) continue;
-            }
-
-            doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.", " ", ".STATE.",
-                      " ");
-        }
-
-        typeSize = get_explicit_type_size(types[typeIdx]);
-        preSizeBytes = preSize + typeSize * typeMultiplePreSize;
-        postSizeBytes = postSize + typeSize * typeMultiplePostSize;
-
-
-        for (vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
-        {
-
-            totSize = preSizeBytes + postSizeBytes
-                + typeSize * get_align(g_arrVecSizes[vecSizeIdx]);
-
-            doReplace(srcBuffer, 2048, tmpBuffer, ".TYPE.",
-                      g_arrTypeNames[typeIdx], ".NUM.",
-                      g_arrVecSizeNames[vecSizeIdx]);
-
-            if (srcBuffer[0] == '\0')
-            {
-                vlog_error("%s: failed to fill source buf for type %s%s\n",
-                           testName, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
-            // fflush(stdout);
-
-            err = clStateMakeProgram(pClState, srcBuffer, testName);
-            if (err)
-            {
-                vlog_error("%s: Error compiling \"\n%s\n\"", testName,
-                           srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            err = pushArgs(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to push args %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            // log_info("About to Run kernel\n"); fflush(stdout);
-            // now we run the kernel
-            err = runKernel(
-                pClState,
-                bufSize
-                    / (g_arrVecSizes[vecSizeIdx] * g_arrTypeSizes[typeIdx]));
-            if (err != 0)
-            {
-                vlog_error("%s: runKernel fail (%ld threads) %s%s\n", testName,
-                           pClState->m_numThreads, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            // log_info("About to retrieve results\n"); fflush(stdout);
-            err = retrieveResults(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to retrieve results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-
-            if (preSizeBytes + postSizeBytes == 0)
-            {
-                // log_info("About to Check Correctness\n"); fflush(stdout);
-                err = checkCorrectnessAlign(pBuffers, pClState,
-                                            get_align(g_arrVecSizes[vecSizeIdx])
-                                                * typeSize);
-            }
-            else
-            {
-                // we're checking for an aligned struct
-                err = checkPackedCorrectness(pBuffers, pClState, totSize,
-                                             preSizeBytes);
-            }
-
-            if (err != 0)
-            {
-                vlog_error("%s: incorrect results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                vlog_error("%s: Source was \"\n%s\n\"", testName, srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-
-            clStateDestroyProgramAndKernel(pClState);
-        }
-    }
-
-    destroyBufferStruct(pBuffers, pClState);
-
-    destroyClState(pClState);
-
-
-    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
-    return 0; // -1; // fails on account of not being written.
-}
-
-
-static const char* patterns[] = {
-    ".PRAGMA..STATE.\n"
-    "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, "
-    ".DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct myUnpackedStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, "
-    ".DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    .SRC_SCOPE. testStruct test;\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. "
-    "*source, .DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    .SRC_SCOPE. testStruct test;\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. "
-    "uchar *)&test);\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct myStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, "
-    ".DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE.  testStruct "
-    "*source, .DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - "
-    "(.SRC_SCOPE. uchar *)&(source[0]));\n"
-    "}\n",
-    // __attribute__ ((packed))
-};
-
-
-const char* pre_substitution_arr[] = { "",
-                                       "char c;\n",
-                                       "short3 s;",
-                                       ".TYPE.3 tPre;\n",
-                                       ".TYPE. arrPre[5];\n",
-                                       ".TYPE. arrPre[12];\n",
-                                       NULL };
-
-
-// alignments of everything in pre_substitution_arr as raw alignments
-// 0 if such a thing is meaningless
-size_t pre_align_arr[] = { 0,
-                           sizeof(cl_char),
-                           4 * sizeof(cl_short),
-                           0, // taken care of in type_multiple_pre_align_arr
-                           0,
-                           0 };
-
-// alignments of everything in pre_substitution_arr as multiples of
-// sizeof(.TYPE.)
-// 0 if such a thing is meaningless
-size_t type_multiple_pre_align_arr[] = { 0, 0, 0, 4, 5, 12 };
-
-const char* post_substitution_arr[] = { "",
-                                        "char cPost;\n",
-                                        ".TYPE. arrPost[3];\n",
-                                        ".TYPE. arrPost[5];\n",
-                                        ".TYPE.3 arrPost;\n",
-                                        ".TYPE. arrPost[12];\n",
-                                        NULL };
-
-
-// alignments of everything in post_substitution_arr as raw alignments
-// 0 if such a thing is meaningless
-size_t post_align_arr[] = { 0, sizeof(cl_char),
-                            0, // taken care of in type_multiple_post_align_arr
-                            0, 0,
-                            0 };
-
-// alignments of everything in post_substitution_arr as multiples of
-// sizeof(.TYPE.)
-// 0 if such a thing is meaningless
-size_t type_multiple_post_align_arr[] = { 0, 0, 3, 5, 4, 12 };
-
-// there hsould be a packed version of this?
-int test_vec_align_array(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements)
-{
-    char tmp[2048];
-    int result;
-
-    log_info("Testing global\n");
-    doReplace(tmp, (size_t)2048, patterns[0], ".SRC_SCOPE.", "__global",
-              ".DST_SCOPE.", "__global"); //
-    result = test_vec_internal(deviceID, context, queue, tmp,
-                               "test_vec_align_array", BUFFER_SIZE, 0, 0, 0, 0);
-    return result;
-}
-
-
-int test_vec_align_struct(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-{
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-
-    log_info("testing __private\n");
-    doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__private",
-              ".DST_SCOPE.", "__global"); //
-
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-
-            result =
-                test_vec_internal(deviceID, context, queue, tmp1,
-                                  "test_vec_align_struct", 512, 0, 0, 0, 0);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-
-    log_info("testing __local\n");
-    doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__local",
-              ".DST_SCOPE.", "__global"); //
-
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-
-            result =
-                test_vec_internal(deviceID, context, queue, tmp1,
-                                  "test_vec_align_struct", 512, 0, 0, 0, 0);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    return 0;
-}
-
-int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-{
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-
-
-    log_info("Testing __private\n");
-    doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__private",
-              ".DST_SCOPE.", "__global"); //
-
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-
-            result = test_vec_internal(
-                deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
-                512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
-                post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-
-    log_info("testing __local\n");
-    doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__local",
-              ".DST_SCOPE.", "__global"); //
-
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-
-            result = test_vec_internal(
-                deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
-                512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
-                post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    return 0;
-}
-
-int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements)
-{
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-
-
-    log_info("testing __global\n");
-    doReplace(tmp2, (size_t)2048, patterns[3], ".SRC_SCOPE.", "__global",
-              ".DST_SCOPE.", "__global"); //
-
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-
-            result = test_vec_internal(deviceID, context, queue, tmp1,
-                                       "test_vec_align_struct_arr", BUFFER_SIZE,
-                                       0, 0, 0, 0);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    return 0;
-}
-
-int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements)
-{
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-
-
-    log_info("Testing __global\n");
-    doReplace(tmp2, (size_t)2048, patterns[4], ".SRC_SCOPE.", "__global",
-              ".DST_SCOPE.", "__global"); //
-
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-
-            result = test_vec_internal(
-                deviceID, context, queue, tmp1,
-                "test_vec_align_packed_struct_arr", BUFFER_SIZE,
-                pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
-                post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
-            if (result != 0) return result;
-        }
-    }
-    return 0;
-}

diff --git a/test_conformance/vectors/type_replacer.cpp b/test_conformance/vectors/type_replacer.cpp
deleted file mode 100644
index 39c6194..0000000
--- a/test_conformance/vectors/type_replacer.cpp
+++ /dev/null

@@ -1,132 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <string.h>
-#if !defined(_MSC_VER)
-#include <stdint.h>
-#endif // !_MSC_VER
-
-size_t doReplace(char* dest, size_t destLength, const char* source,
-                 const char* stringToReplace1, const char* replaceWith1,
-                 const char* stringToReplace2, const char* replaceWith2)
-{
-    size_t copyCount = 0;
-    const char* sourcePtr = source;
-    char* destPtr = dest;
-    const char* ptr1;
-    const char* ptr2;
-    size_t nJump;
-    size_t len1, len2;
-    size_t lenReplace1, lenReplace2;
-    len1 = strlen(stringToReplace1);
-    len2 = strlen(stringToReplace2);
-    lenReplace1 = strlen(replaceWith1);
-    lenReplace2 = strlen(replaceWith2);
-    for (; copyCount < destLength && *sourcePtr;)
-    {
-        ptr1 = strstr(sourcePtr, stringToReplace1);
-        ptr2 = strstr(sourcePtr, stringToReplace2);
-        if (ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
-        {
-            nJump = ptr1 - sourcePtr;
-            if (((uintptr_t)ptr1 - (uintptr_t)sourcePtr)
-                > destLength - copyCount)
-            {
-                return -1;
-            }
-            copyCount += nJump;
-            strncpy(destPtr, sourcePtr, nJump);
-            destPtr += nJump;
-            sourcePtr += nJump + len1;
-            strcpy(destPtr, replaceWith1);
-            destPtr += lenReplace1;
-        }
-        else if (ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
-        {
-            nJump = ptr2 - sourcePtr;
-            if (nJump > destLength - copyCount)
-            {
-                return -2;
-            }
-            copyCount += nJump;
-            strncpy(destPtr, sourcePtr, nJump);
-            destPtr += nJump;
-            sourcePtr += nJump + len2;
-            strcpy(destPtr, replaceWith2);
-            destPtr += lenReplace2;
-        }
-        else
-        {
-            nJump = strlen(sourcePtr);
-            if (nJump > destLength - copyCount)
-            {
-                return -3;
-            }
-            copyCount += nJump;
-            strcpy(destPtr, sourcePtr);
-            destPtr += nJump;
-            sourcePtr += nJump;
-        }
-    }
-    *destPtr = '\0';
-    return copyCount;
-}
-
-size_t doSingleReplace(char* dest, size_t destLength, const char* source,
-                       const char* stringToReplace, const char* replaceWith)
-{
-    size_t copyCount = 0;
-    const char* sourcePtr = source;
-    char* destPtr = dest;
-    const char* ptr;
-    size_t nJump;
-    size_t len;
-    size_t lenReplace;
-    len = strlen(stringToReplace);
-    lenReplace = strlen(replaceWith);
-    for (; copyCount < destLength && *sourcePtr;)
-    {
-        ptr = strstr(sourcePtr, stringToReplace);
-        if (ptr != NULL)
-        {
-            nJump = ptr - sourcePtr;
-            if (((uintptr_t)ptr - (uintptr_t)sourcePtr)
-                > destLength - copyCount)
-            {
-                return -1;
-            }
-            copyCount += nJump;
-            strncpy(destPtr, sourcePtr, nJump);
-            destPtr += nJump;
-            sourcePtr += nJump + len;
-            strcpy(destPtr, replaceWith);
-            destPtr += lenReplace;
-        }
-        else
-        {
-            nJump = strlen(sourcePtr);
-            if (nJump > destLength - copyCount)
-            {
-                return -3;
-            }
-            copyCount += nJump;
-            strcpy(destPtr, sourcePtr);
-            destPtr += nJump;
-            sourcePtr += nJump;
-        }
-    }
-    *destPtr = '\0';
-    return copyCount;
-}

diff --git a/test_conformance/vectors/type_replacer.h b/test_conformance/vectors/type_replacer.h
deleted file mode 100644
index d7eb7ef..0000000
--- a/test_conformance/vectors/type_replacer.h
+++ /dev/null

@@ -1,23 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdlib.h>
-
-size_t doReplace(char* dest, size_t destLength, const char* source,
-                 const char* stringToReplace1, const char* replaceWith1,
-                 const char* stringToReplace2, const char* replaceWith2);
-
-size_t doSingleReplace(char* dest, size_t destLength, const char* source,
-                       const char* stringToReplace, const char* replaceWith);

diff --git a/test_conformance/workgroups/main.cpp b/test_conformance/workgroups/main.cpp
index 41ffa74..1b47420 100644
--- a/test_conformance/workgroups/main.cpp
+++ b/test_conformance/workgroups/main.cpp

@@ -47,9 +47,7 @@
     auto expected_min_version = Version(2, 0);
     if (version < expected_min_version)
     {
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
     }
 

diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp
index ccf17b6..33ebe99 100644
--- a/test_conformance/workgroups/test_wg_all.cpp
+++ b/test_conformance/workgroups/test_wg_all.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -79,8 +79,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_all_kernel_code, "test_wg_all");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_all_kernel_code, "test_wg_all", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -92,17 +91,14 @@
 
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * (num_elements + 1), NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * (num_elements+1), NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp
index 4785ad5..cd1ebff 100644
--- a/test_conformance/workgroups/test_wg_any.cpp
+++ b/test_conformance/workgroups/test_wg_any.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -79,8 +79,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_any_kernel_code, "test_wg_any");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_any_kernel_code, "test_wg_any", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -92,17 +91,14 @@
 
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * (num_elements + 1), NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * (num_elements+1), NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp
index 3555947..df4263b 100644
--- a/test_conformance/workgroups/test_wg_broadcast.cpp
+++ b/test_conformance/workgroups/test_wg_broadcast.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -174,9 +174,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_broadcast_1D_kernel_code,
-                                      "test_wg_broadcast_1D");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_1D_kernel_code, "test_wg_broadcast_1D", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -188,16 +186,14 @@
 
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -283,9 +279,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_broadcast_2D_kernel_code,
-                                      "test_wg_broadcast_2D");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_2D_kernel_code, "test_wg_broadcast_2D", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -317,16 +311,14 @@
 
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -410,9 +402,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_broadcast_3D_kernel_code,
-                                      "test_wg_broadcast_3D");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_3D_kernel_code, "test_wg_broadcast_3D", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -445,16 +435,14 @@
 
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_reduce.cpp b/test_conformance/workgroups/test_wg_reduce.cpp
index eb26f49..92a5165 100644
--- a/test_conformance/workgroups/test_wg_reduce.cpp
+++ b/test_conformance/workgroups/test_wg_reduce.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -176,9 +176,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_int,
-                                      "test_wg_reduce_add_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_int, "test_wg_reduce_add_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -190,16 +188,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -281,9 +277,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_uint,
-                                      "test_wg_reduce_add_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_uint, "test_wg_reduce_add_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -295,16 +289,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -385,9 +377,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_long,
-                                      "test_wg_reduce_add_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_long, "test_wg_reduce_add_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -399,16 +389,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -490,9 +478,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_ulong,
-                                      "test_wg_reduce_add_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_ulong, "test_wg_reduce_add_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -504,16 +490,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_reduce_max.cpp b/test_conformance/workgroups/test_wg_reduce_max.cpp
index 3bbd3f2..7f37b5a 100644
--- a/test_conformance/workgroups/test_wg_reduce_max.cpp
+++ b/test_conformance/workgroups/test_wg_reduce_max.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -177,9 +177,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_int,
-                                      "test_wg_reduce_max_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_int, "test_wg_reduce_max_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -199,16 +197,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -291,9 +287,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_uint,
-                                      "test_wg_reduce_max_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_uint, "test_wg_reduce_max_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -313,16 +307,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -404,9 +396,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_long,
-                                      "test_wg_reduce_max_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_long, "test_wg_reduce_max_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -426,16 +416,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -518,9 +506,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_ulong,
-                                      "test_wg_reduce_max_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_ulong, "test_wg_reduce_max_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -540,16 +526,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_reduce_min.cpp b/test_conformance/workgroups/test_wg_reduce_min.cpp
index 7b1b22e..9d929c8 100644
--- a/test_conformance/workgroups/test_wg_reduce_min.cpp
+++ b/test_conformance/workgroups/test_wg_reduce_min.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -177,9 +177,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_int,
-                                      "test_wg_reduce_min_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_int, "test_wg_reduce_min_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -199,16 +197,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -291,9 +287,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_uint,
-                                      "test_wg_reduce_min_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_uint, "test_wg_reduce_min_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -313,16 +307,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -404,9 +396,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_long,
-                                      "test_wg_reduce_min_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_long, "test_wg_reduce_min_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -426,16 +416,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -518,9 +506,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_ulong,
-                                      "test_wg_reduce_min_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_ulong, "test_wg_reduce_min_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -540,16 +526,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
index e695a16..40c50c8 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -184,9 +184,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_int,
-                                      "test_wg_scan_exclusive_add_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_int, "test_wg_scan_exclusive_add_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -198,16 +196,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -289,9 +285,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_uint,
-                                      "test_wg_scan_exclusive_add_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_uint, "test_wg_scan_exclusive_add_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -303,16 +297,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -393,9 +385,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_long,
-                                      "test_wg_scan_exclusive_add_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_long, "test_wg_scan_exclusive_add_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -407,16 +397,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -498,9 +486,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_add_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_ulong, "test_wg_scan_exclusive_add_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -512,16 +498,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
index 12338b6..7f37acd 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -176,9 +176,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_int,
-                                      "test_wg_scan_exclusive_max_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_int, "test_wg_scan_exclusive_max_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -198,16 +196,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -290,9 +286,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_uint,
-                                      "test_wg_scan_exclusive_max_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_uint, "test_wg_scan_exclusive_max_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -312,16 +306,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -403,9 +395,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_long,
-                                      "test_wg_scan_exclusive_max_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_long, "test_wg_scan_exclusive_max_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -425,16 +415,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -517,9 +505,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_max_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_ulong, "test_wg_scan_exclusive_max_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -539,16 +525,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
index f4e6bf9..6111053 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -177,9 +177,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_int,
-                                      "test_wg_scan_exclusive_min_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_int, "test_wg_scan_exclusive_min_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -199,16 +197,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -291,9 +287,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_uint,
-                                      "test_wg_scan_exclusive_min_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_uint, "test_wg_scan_exclusive_min_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -313,16 +307,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -404,9 +396,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_long,
-                                      "test_wg_scan_exclusive_min_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_long, "test_wg_scan_exclusive_min_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -426,16 +416,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -518,9 +506,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_min_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_ulong, "test_wg_scan_exclusive_min_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -540,16 +526,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
index 51c98a4..9546794 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -173,9 +173,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_int,
-                                      "test_wg_scan_inclusive_add_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_int, "test_wg_scan_inclusive_add_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -187,16 +185,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -278,9 +274,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_uint,
-                                      "test_wg_scan_inclusive_add_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_uint, "test_wg_scan_inclusive_add_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -292,16 +286,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -382,9 +374,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_long,
-                                      "test_wg_scan_inclusive_add_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_long, "test_wg_scan_inclusive_add_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -396,16 +386,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -487,9 +475,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_add_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_ulong, "test_wg_scan_inclusive_add_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -501,16 +487,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
index 44ebf80..23d518c 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -175,9 +175,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_int,
-                                      "test_wg_scan_inclusive_max_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_int, "test_wg_scan_inclusive_max_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -189,16 +187,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -280,9 +276,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_uint,
-                                      "test_wg_scan_inclusive_max_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_uint, "test_wg_scan_inclusive_max_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -294,16 +288,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -384,9 +376,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_long,
-                                      "test_wg_scan_inclusive_max_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_long, "test_wg_scan_inclusive_max_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -398,16 +388,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -489,9 +477,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_max_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_ulong, "test_wg_scan_inclusive_max_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -503,16 +489,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
index f2f0578..f4c788f 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp

@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-//
+// 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -175,9 +175,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_int,
-                                      "test_wg_scan_inclusive_min_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_int, "test_wg_scan_inclusive_min_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -189,16 +187,14 @@
 
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -280,9 +276,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_uint,
-                                      "test_wg_scan_inclusive_min_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_uint, "test_wg_scan_inclusive_min_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -294,16 +288,14 @@
 
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -384,9 +376,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_long,
-                                      "test_wg_scan_inclusive_min_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_long, "test_wg_scan_inclusive_min_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -398,16 +388,14 @@
 
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");
@@ -489,9 +477,7 @@
     int          i;
     MTdata       d;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_min_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_ulong, "test_wg_scan_inclusive_min_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
 
@@ -503,16 +489,14 @@
 
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
     {
         log_error("clCreateBuffer failed\n");
         return -1;
     }
 
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
     {
         log_error("clCreateBuffer failed\n");

diff --git a/test_extensions/CMakeLists.txt b/test_extensions/CMakeLists.txt
new file mode 100644
index 0000000..3c48e18
--- /dev/null
+++ b/test_extensions/CMakeLists.txt

@@ -0,0 +1,2 @@
+set(HARNESS_LIB harness)
+add_subdirectory( media_sharing )

diff --git a/test_extensions/media_sharing/CMakeLists.txt b/test_extensions/media_sharing/CMakeLists.txt
new file mode 100644
index 0000000..9fdde1c
--- /dev/null
+++ b/test_extensions/media_sharing/CMakeLists.txt

@@ -0,0 +1,25 @@
+if(WIN32)
+list(APPEND CLConform_LIBRARIES d3d9.lib dxva2.lib )
+set(MODULE_NAME MEDIA_SURFACE_SHARING)
+
+set(${MODULE_NAME}_SOURCES
+        main.cpp
+        wrappers.cpp
+        utils.cpp
+        test_create_context.cpp
+        test_functions_api.cpp
+        test_functions_kernel.cpp
+        test_get_device_ids.cpp
+        test_interop_sync.cpp
+        test_memory_access.cpp
+        test_other_data_types.cpp
+    )
+
+set_source_files_properties(
+        ${MODULE_NAME}_SOURCES
+        PROPERTIES LANGUAGE CXX)
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+include(../../test_conformance/CMakeCommon.txt)
+endif(WIN32)

diff --git a/test_extensions/media_sharing/main.cpp b/test_extensions/media_sharing/main.cpp
new file mode 100644
index 0000000..98b766a
--- /dev/null
+++ b/test_extensions/media_sharing/main.cpp

@@ -0,0 +1,204 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "harness/testHarness.h"
+#include "utils.h"
+#include "procs.h"
+
+
+test_definition test_list[] = {
+ADD_TEST( context_create ),
+ADD_TEST( get_device_ids ),
+ADD_TEST( api ),
+ADD_TEST( kernel ),
+ADD_TEST( other_data_types ),
+ADD_TEST( memory_access ),
+ADD_TEST( interop_user_sync )
+};
+
+const int test_num = ARRAY_SIZE(test_list);
+
+clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR = NULL;
+clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL;
+clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR = NULL;
+clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR = NULL;
+
+cl_platform_id gPlatformIDdetected;
+cl_device_id gDeviceIDdetected;
+cl_device_type gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+
+bool MediaSurfaceSharingExtensionInit()
+{
+  clGetDeviceIDsFromDX9MediaAdapterKHR = (clGetDeviceIDsFromDX9MediaAdapterKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clGetDeviceIDsFromDX9MediaAdapterKHR");
+  if (clGetDeviceIDsFromDX9MediaAdapterKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clGetDeviceIDsFromDX9MediaAdapterKHR) returned NULL.\n");
+    return false;
+  }
+
+  clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clCreateFromDX9MediaSurfaceKHR");
+  if (clCreateFromDX9MediaSurfaceKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clCreateFromDX9MediaSurfaceKHR) returned NULL.\n");
+    return false;
+  }
+
+  clEnqueueAcquireDX9MediaSurfacesKHR = (clEnqueueAcquireDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueAcquireDX9MediaSurfacesKHR");
+  if (clEnqueueAcquireDX9MediaSurfacesKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueAcquireDX9MediaSurfacesKHR) returned NULL.\n");
+    return false;
+  }
+
+  clEnqueueReleaseDX9MediaSurfacesKHR = (clEnqueueReleaseDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueReleaseDX9MediaSurfacesKHR");
+  if (clEnqueueReleaseDX9MediaSurfacesKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueReleaseDX9MediaSurfacesKHR) returned NULL.\n");
+    return false;
+  }
+
+  return true;
+}
+
+bool DetectPlatformAndDevice()
+{
+  std::vector<cl_platform_id> platforms;
+  cl_uint platformsNum = 0;
+  cl_int error = clGetPlatformIDs(0, 0, &platformsNum);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetPlatformIDs failed\n");
+    return false;
+  }
+
+  platforms.resize(platformsNum);
+  error = clGetPlatformIDs(platformsNum, &platforms[0], 0);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetPlatformIDs failed\n");
+    return false;
+  }
+
+  bool found = false;
+  for (size_t i = 0; i < platformsNum; ++i)
+  {
+    std::vector<cl_device_id> devices;
+    cl_uint devicesNum = 0;
+    error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, 0, 0, &devicesNum);
+    if (error != CL_SUCCESS)
+    {
+      print_error(error, "clGetDeviceIDs failed\n");
+      return false;
+    }
+
+    devices.resize(devicesNum);
+    error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, devicesNum, &devices[0], 0);
+    if (error != CL_SUCCESS)
+    {
+      print_error(error, "clGetDeviceIDs failed\n");
+      return false;
+    }
+
+    for (size_t j = 0; j < devicesNum; ++j)
+    {
+      if (is_extension_available(devices[j], "cl_khr_dx9_media_sharing"))
+      {
+        gPlatformIDdetected = platforms[i];
+        gDeviceIDdetected = devices[j];
+        found = true;
+        break;
+      }
+    }
+  }
+
+  if (!found)
+  {
+    log_info("Test was not run, because the media surface sharing extension is not supported for any devices.\n");
+    return false;
+  }
+
+  return true;
+}
+
+bool CmdlineParse(int argc, const char *argv[])
+{
+  char *env_mode = getenv( "CL_DEVICE_TYPE" );
+  if( env_mode != NULL )
+  {
+    if(strcmp(env_mode, "gpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
+    else if(strcmp(env_mode, "cpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
+    else if(strcmp(env_mode, "accelerator") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
+    else if(strcmp(env_mode, "default") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+    else
+    {
+      log_error("Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode);
+      return false;
+    }
+  }
+
+  for (int i = 0; i < argc; ++i)
+  {
+    if(strcmp(argv[i], "gpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_GPU") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
+      continue;
+    }
+    else if(strcmp( argv[i], "cpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_CPU") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
+      continue;
+    }
+    else if(strcmp( argv[i], "accelerator") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
+      continue;
+    }
+    else if(strcmp(argv[i], "CL_DEVICE_TYPE_DEFAULT") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+      continue;
+    }
+    else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0)
+    {
+      CDeviceWrapper::AccelerationType(CDeviceWrapper::ACCELERATION_SW);
+    }
+  }
+
+  return true;
+}
+
+int main(int argc, const char *argv[])
+{
+  if (!CmdlineParse(argc, argv))
+    return TEST_FAIL;
+
+  if (!DetectPlatformAndDevice())
+  {
+    log_info("Test was not run, because the media surface sharing extension is not supported\n");
+    return TEST_SKIP;
+  }
+
+  if (!MediaSurfaceSharingExtensionInit())
+    return TEST_FAIL;
+
+  return runTestHarness(argc, argv, test_num, test_list, false, true, 0);
+}

diff --git a/test_extensions/media_sharing/procs.h b/test_extensions/media_sharing/procs.h
new file mode 100644
index 0000000..6b57799
--- /dev/null
+++ b/test_extensions/media_sharing/procs.h

@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2019 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#ifndef __MEDIA_SHARING_PROCS_H__
+#define __MEDIA_SHARING_PROCS_H__
+
+
+extern int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+#endif    // #ifndef __MEDIA_SHARING_PROCS_H__ 
\ No newline at end of file

diff --git a/test_extensions/media_sharing/test_create_context.cpp b/test_extensions/media_sharing/test_create_context.cpp
new file mode 100644
index 0000000..5637bc5
--- /dev/null
+++ b/test_extensions/media_sharing/test_create_context.cpp

@@ -0,0 +1,318 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+                   int num_elements, unsigned int width, unsigned int height,
+                   TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType,
+                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  CResult result;
+
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input data
+  std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
+  if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    clContextWrapper ctx;
+    switch(functionCreate)
+    {
+    case CONTEXT_CREATE_DEFAULT:
+      ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+      break;
+    case CONTEXT_CREATE_FROM_TYPE:
+      ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error);
+      break;
+    default:
+      log_error("Unknown context creation function enum\n");
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+      break;
+    }
+
+    if (error != CL_SUCCESS)
+    {
+      std::string functionName;
+      FunctionContextCreateToString(functionCreate, functionName);
+      log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+#else
+    void *surfaceInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    cl_event event;
+    error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()),
+      &memObjList.at(0), 0, NULL, &event);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    cl_uint eventType = 0;
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if(eventType != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR)
+    {
+      log_error("Invalid event != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    clReleaseEvent(event);
+
+    size_t origin[3] = {0,0,0};
+    size_t offset = 0;
+    size_t frameSize = width * height * 3 / 2;
+    std::vector<cl_uchar> out( frameSize, 0 );
+    for (size_t i = 0; i < memObjList.size(); ++i)
+    {
+      size_t planeWidth = (i == 0) ? width: width / 2;
+      size_t planeHeight = (i == 0) ? height: height / 2;
+      size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+      error = clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, regionPlane, 0, 0, &out.at(offset), 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      offset += planeWidth * planeHeight;
+    }
+
+    if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
+    {
+      log_error("OCL object verification failed - clEnqueueReadImage\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()),
+      &memObjList.at(0), 0, NULL, &event);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    eventType = 0;
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if(eventType != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR)
+    {
+      log_error("Invalid event != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    clReleaseEvent(event);
+
+    //object verification
+    std::vector<cl_uchar> bufferOut(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+  }
+
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+
+  return result.Result();
+}
+
+int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  const unsigned int WIDTH = 256;
+  const unsigned int HEIGHT = 256;
+
+  std::vector<cl_dx9_media_adapter_type_khr> adapterTypes;
+#if defined(_WIN32)
+  adapterTypes.push_back(CL_ADAPTER_D3D9_KHR);
+  adapterTypes.push_back(CL_ADAPTER_D3D9EX_KHR);
+  adapterTypes.push_back(CL_ADAPTER_DXVA_KHR);
+#endif
+
+  std::vector<TContextFuncType> contextFuncs;
+  contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
+  contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
+
+  std::vector<TSurfaceFormat> formats;
+  formats.push_back(SURFACE_FORMAT_NV12);
+  formats.push_back(SURFACE_FORMAT_YV12);
+
+  std::vector<TSharedHandleType> sharedHandleTypes;
+  sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
+#if defined(_WIN32)
+  sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
+#endif
+
+  CResult result;
+  for (size_t adapterTypeIdx = 0; adapterTypeIdx < adapterTypes.size(); ++adapterTypeIdx)
+  {
+    //iteration through all create context functions
+    for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx)
+    {
+      //iteration through surface formats
+      for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
+      {
+        //shared handle enabled or disabled
+        for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx)
+        {
+          if (adapterTypes[adapterTypeIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)
+            continue;
+
+          if(context_create(deviceID, context, queue, num_elements, WIDTH, HEIGHT,
+            contextFuncs[contextFuncIdx], adapterTypes[adapterTypeIdx], formats[formatIdx],
+            sharedHandleTypes[sharedHandleIdx]) != 0)
+          {
+            std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle";
+            std::string formatStr;
+            std::string adapterTypeStr;
+            SurfaceFormatToString(formats[formatIdx], formatStr);
+            AdapterToString(adapterTypes[adapterTypeIdx], adapterTypeStr);
+
+            log_error("\nTest case - clCreateContext (%s, %s, %s) failed\n\n", adapterTypeStr.c_str(), formatStr.c_str(), sharedHandle.c_str());
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+        }
+      }
+    }
+  }
+
+  return result.Result();
+}

diff --git a/test_extensions/media_sharing/test_functions_api.cpp b/test_extensions/media_sharing/test_functions_api.cpp
new file mode 100644
index 0000000..cdc6ce8
--- /dev/null
+++ b/test_extensions/media_sharing/test_functions_api.cpp

@@ -0,0 +1,617 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                  unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  const unsigned int FRAME_NUM = 2;
+  const cl_uchar MAX_VALUE = 255 / 2;
+  CResult result;
+
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input and expected data
+  std::vector<std::vector<cl_uchar> > bufferRef1(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferRef2(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferRef3(FRAME_NUM);
+  size_t frameSize = width * height * 3 / 2;
+  cl_uchar step = MAX_VALUE / FRAME_NUM;
+  for (size_t i = 0; i < FRAME_NUM; ++i)
+  {
+    if (!YUVGenerate(surfaceFormat, bufferRef1[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1))) ||
+        !YUVGenerate(surfaceFormat, bufferRef2[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.2) ||
+        !YUVGenerate(surfaceFormat, bufferRef3[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.4))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+  }
+
+  //iterates through all devices
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+
+    //create surface
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+#else
+    void *surfaceInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef1[frameIdx % FRAME_NUM], width, height))
+      {
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      { //read operation
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image is different then shared OCL object: clEnqueueReadImage\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //write operation
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+            0, 0, &bufferRef2[frameIdx % FRAME_NUM][offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+      }
+
+      { //read operation
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, Shared OCL image verification after clEnqueueWriteImage failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy operation (shared OCL to OCL)
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          cl_image_format formatPlane;
+          formatPlane.image_channel_data_type = CL_UNORM_INT8;
+          formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R;
+
+          cl_image_desc imageDesc = {0};
+          imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+          imageDesc.image_width = planeWidth;
+          imageDesc.image_height = planeHeight;
+
+          clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_READ_WRITE, &formatPlane, &imageDesc, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clCreateImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueCopyImage(cmdQueue, memObjList[i], planeOCL, origin, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueReadImage(cmdQueue, planeOCL, CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from shared OCL to OCL) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy operation (OCL to shared OCL)
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth) * sizeof(cl_uchar);
+
+          cl_image_format formatPlane;
+          formatPlane.image_channel_data_type = CL_UNORM_INT8;
+          formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R;
+
+          cl_image_desc imageDesc = {0};
+          imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+          imageDesc.image_width = planeWidth;
+          imageDesc.image_height = planeHeight;
+          imageDesc.image_row_pitch = pitchSize;
+
+          clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_COPY_HOST_PTR, &formatPlane, &imageDesc, &bufferRef1[frameIdx % FRAME_NUM][offset], &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clCreateImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueCopyImage(cmdQueue, planeOCL, memObjList[i], origin, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from OCL to shared OCL) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy from image to buffer
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        size_t bufferSize = sizeof(cl_uchar) * frameSize;
+        clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueCopyImageToBuffer(cmdQueue, memObjList[i], buffer, origin, regionPlane, offset, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImageToBuffer failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight * sizeof(cl_uchar);
+        }
+
+        std::vector<cl_uchar> out( frameSize, 0 );
+        error = clEnqueueReadBuffer( cmdQueue, buffer, CL_TRUE, 0, bufferSize, &out[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL buffer verification after clEnqueueCopyImageToBuffer (from shared OCL image to OCL buffer) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy buffer to image
+        size_t bufferSize = sizeof(cl_uchar) * frameSize;
+        clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_COPY_HOST_PTR, bufferSize, &bufferRef2[frameIdx % FRAME_NUM][0], &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueCopyBufferToImage(cmdQueue, buffer, memObjList[i], offset, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyBufferToImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight * sizeof(cl_uchar);
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyBufferToImage (from OCL buffer to shared OCL image) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //map operation to read
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth);
+
+          size_t rowPitch = 0;
+          size_t slicePitch = 0;
+          void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_READ, origin, regionPlane,
+            &rowPitch, &slicePitch, 0, 0, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          for (size_t y = 0; y < planeHeight; ++y)
+            memcpy(&out[offset + y * pitchSize], static_cast<cl_uchar *>(mapPtr) + y * rowPitch / sizeof(cl_uchar), pitchSize * sizeof(cl_uchar));
+
+          error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += pitchSize * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, Mapped shared OCL image is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //map operation to write
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth);
+
+          size_t rowPitch = 0;
+          size_t slicePitch = 0;
+          void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_WRITE, origin, regionPlane,
+            &rowPitch, &slicePitch, 0, 0, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          for (size_t y = 0; y < planeHeight; ++y)
+            memcpy(static_cast<cl_uchar *>(mapPtr) + y * rowPitch / sizeof(cl_uchar), &bufferRef3[frameIdx % FRAME_NUM][offset + y * pitchSize], pitchSize * sizeof(cl_uchar));
+
+          error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += pitchSize * planeHeight;
+        }
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      std::vector<cl_uchar> bufferOut(frameSize, 0);
+      if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+      {
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      if (!YUVCompare(surfaceFormat, bufferOut, bufferRef3[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, media surface is different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+      log_error("%s init failed\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+
+  return result.Result();
+}
+
+int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 3, 512, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+  if(api_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+  if(api_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}

diff --git a/test_extensions/media_sharing/test_functions_kernel.cpp b/test_extensions/media_sharing/test_functions_kernel.cpp
new file mode 100644
index 0000000..f5c3e2d
--- /dev/null
+++ b/test_extensions/media_sharing/test_functions_kernel.cpp

@@ -0,0 +1,446 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+
+#include "utils.h"
+
+int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                     unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                     TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  const unsigned int FRAME_NUM = 2;
+  const cl_uchar MAX_VALUE = 255 / 2;
+  const std::string PROGRAM_STR =
+    "__kernel void TestFunction( read_only image2d_t planeIn, write_only image2d_t planeOut, "
+    NL "                            sampler_t sampler, __global int *planeRes)"
+    NL "{"
+    NL "  int w = get_global_id(0);"
+    NL "  int h = get_global_id(1);"
+    NL "  int width = get_image_width(planeIn);"
+    NL "  int height = get_image_height(planeOut);"
+    NL "  float4 color0 = read_imagef(planeIn, sampler, (int2)(w,h)) + 0.2f;"
+    NL "  float4 color1 = read_imagef(planeIn, sampler, (float2)(w,h)) + 0.2f;"
+    NL "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);"
+    NL "  write_imagef(planeOut, (int2)(w,h), color0);"
+    NL "  if(w == 0 && h == 0)"
+    NL "  {"
+    NL "    planeRes[0] = width;"
+    NL "    planeRes[1] = height;"
+    NL "  }"
+    NL "}";
+
+  CResult result;
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  std::vector<std::vector<cl_uchar> > bufferIn(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferExp(FRAME_NUM);
+  size_t frameSize = width * height * 3 / 2;
+  cl_uchar step = MAX_VALUE / FRAME_NUM;
+  for (size_t i = 0; i < FRAME_NUM; ++i)
+  {
+    if (!YUVGenerate(surfaceFormat, bufferIn[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1))) ||
+        !YUVGenerate(surfaceFormat, bufferExp[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.2))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSrcHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceSrc;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    void *objectDstHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceDst;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfoSrc;
+    surfaceInfoSrc.resource = *(static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
+    surfaceInfoSrc.shared_handle = objectSrcHandle;
+
+    cl_dx9_surface_info_khr surfaceInfoDst;
+    surfaceInfoDst.resource = *(static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
+    surfaceInfoDst.shared_handle = objectDstHandle;
+#else
+    void *surfaceInfoSrc = 0;
+    void *surfaceInfoDst = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjSrcList;
+    std::vector<cl_mem> memObjDstList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planeSrcList(planesNum);
+    std::vector<clMemWrapper> planeDstList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planeSrcList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoSrc, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjSrcList.push_back(planeSrcList[planeIdx]);
+
+      planeDstList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoDst, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjDstList.push_back(planeDstList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjSrcList, width, height, surfaceSrc, objectSrcHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      if (!YUVSurfaceSet(surfaceFormat, surfaceSrc, bufferIn[frameIdx % FRAME_NUM], width, height))
+      {
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjDstList.size()), &memObjDstList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+      if(error != CL_SUCCESS)
+      {
+        log_error("Unable to create sampler\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      clProgramWrapper program;
+      clKernelWrapper kernel;
+      const char *progPtr = PROGRAM_STR.c_str();
+      if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction"))
+        result.ResultSub(CResult::TEST_FAIL);
+
+      size_t bufferSize = sizeof(cl_int) * 2;
+      clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+      std::vector<cl_uchar> out( frameSize, 0 );
+      for (size_t i = 0; i < memObjSrcList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+        size_t threads[ 2 ] = { planeWidth, planeHeight };
+
+        error = clSetKernelArg( kernel, 0, sizeof( memObjSrcList[i] ), &memObjSrcList[i] );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( memObjDstList[i] ), &memObjDstList[i] );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t localThreads[ 2 ];
+        error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to get work group size to use" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to execute test kernel" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        std::vector<cl_uint> imageResOut(2, 0);
+        error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[0] != planeWidth)
+        {
+          log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], planeWidth);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[1] != planeHeight)
+        {
+          log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], planeHeight);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clEnqueueReadImage(cmdQueue, memObjDstList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        offset += planeWidth * planeHeight;
+      }
+
+      if (!YUVCompare(surfaceFormat, out, bufferExp[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, OCL objects are different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjDstList.size()), &memObjDstList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      std::vector<cl_uchar> bufferOut(frameSize, 0);
+      if (!YUVSurfaceGet(surfaceFormat, surfaceDst, bufferOut, width, height))
+      {
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      if (!YUVCompare(surfaceFormat, bufferOut, bufferExp[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, media surface is different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+
+  return result.Result();
+}
+
+int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 3, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+  if(kernel_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+  if(kernel_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}

diff --git a/test_extensions/media_sharing/test_get_device_ids.cpp b/test_extensions/media_sharing/test_get_device_ids.cpp
new file mode 100644
index 0000000..f8947ea
--- /dev/null
+++ b/test_extensions/media_sharing/test_get_device_ids.cpp

@@ -0,0 +1,196 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                   cl_dx9_media_adapter_type_khr adapterType)
+{
+  CResult result;
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  cl_uint devicesExpectedNum = 0;
+  cl_int error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, 0, 0, &devicesExpectedNum);
+  if (error != CL_SUCCESS || devicesExpectedNum < 1)
+  {
+    log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  std::vector<cl_device_id> devicesExpected(devicesExpectedNum);
+  error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, devicesExpectedNum, &devicesExpected[0], 0);
+  if (error != CL_SUCCESS)
+  {
+    log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    std::vector<cl_dx9_media_adapter_type_khr> mediaAdapterTypes;
+    mediaAdapterTypes.push_back(adapterType);
+
+    std::vector<void *> mediaDevices;
+    mediaDevices.push_back(deviceWrapper->Device());
+
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result)))
+    {
+      return result.Result();
+    }
+
+    cl_uint devicesAllNum = 0;
+    error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+      CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
+    if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
+    {
+      log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    std::vector<cl_device_id> devicesAll;
+    if (devicesAllNum > 0)
+    {
+      devicesAll.resize(devicesAllNum);
+       error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+        CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesAllNum, &devicesAll[0], 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+
+    cl_uint devicesPreferredNum = 0;
+    error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+      CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesPreferredNum);
+    if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
+    {
+      log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    std::vector<cl_device_id> devicesPreferred;
+    if (devicesPreferredNum > 0)
+    {
+      devicesPreferred.resize(devicesPreferredNum);
+      error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+        CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesPreferredNum, &devicesPreferred[0], 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+
+    if (devicesAllNum < devicesPreferredNum)
+    {
+      log_error("Invalid number of preferred devices. It should be a subset of all devices\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (cl_uint i = 0; i < devicesPreferredNum; ++i)
+    {
+      cl_uint j = 0;
+      for (; j < devicesAllNum; ++j)
+      {
+        if (devicesPreferred[i] == devicesAll[j])
+          break;
+      }
+
+      if (j == devicesAllNum)
+      {
+        log_error("Preferred device is not a subset of all devices\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    for (cl_uint i = 0; i < devicesAllNum; ++i)
+    {
+      cl_uint j = 0;
+      for (; j < devicesExpectedNum; ++j)
+      {
+        if (devicesAll[i] == devicesExpected[j])
+          break;
+      }
+
+      if (j == devicesExpectedNum)
+      {
+        log_error("CL_ALL_DEVICES_FOR_MEDIA_ADAPTER_KHR should be a subset of all devices for selected platform\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+
+  return result.Result();
+}
+
+int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9_KHR) != 0)
+  {
+    log_error("\nTest case (D3D9) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9EX_KHR) != 0)
+  {
+    log_error("\nTest case (D3D9EX) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_DXVA_KHR) != 0)
+  {
+    log_error("\nTest case (DXVA) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}

diff --git a/test_extensions/media_sharing/test_interop_sync.cpp b/test_extensions/media_sharing/test_interop_sync.cpp
new file mode 100644
index 0000000..6831a14
--- /dev/null
+++ b/test_extensions/media_sharing/test_interop_sync.cpp

@@ -0,0 +1,357 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+                   int num_elements, unsigned int width, unsigned int height,
+                   TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType,
+                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle, cl_bool userSync)
+{
+  CResult result;
+
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input data
+  std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
+  if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
+    if (surfaceFormat != SURFACE_FORMAT_NV12 &&
+      !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string syncStr = (userSync == CL_TRUE) ? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s, user sync: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str(), syncStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      CL_CONTEXT_INTEROP_USER_SYNC, userSync,
+      0,
+    };
+
+
+    clContextWrapper ctx;
+    switch(functionCreate)
+    {
+    case CONTEXT_CREATE_DEFAULT:
+      ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+      break;
+    case CONTEXT_CREATE_FROM_TYPE:
+      ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error);
+      break;
+    default:
+      log_error("Unknown context creation function enum\n");
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+      break;
+    }
+
+    if (error != CL_SUCCESS)
+    {
+      std::string functionName;
+      FunctionContextCreateToString(functionCreate, functionName);
+      log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+#else
+    void *surfaceInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if (userSync == CL_TRUE)
+    {
+ #if defined(_WIN32)
+      IDirect3DQuery9* eventQuery = NULL;
+      switch (adapterType)
+      {
+      case CL_ADAPTER_D3D9_KHR:
+        {
+          LPDIRECT3DDEVICE9 device = (LPDIRECT3DDEVICE9)deviceWrapper->Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      case CL_ADAPTER_D3D9EX_KHR:
+        {
+          LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)deviceWrapper->Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      case CL_ADAPTER_DXVA_KHR:
+        {
+          CDXVAWrapper *DXVADevice = dynamic_cast<CDXVAWrapper *>(&(*deviceWrapper));
+          LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)(DXVADevice->D3D9()).Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      default:
+        log_error("Unknown adapter type\n");
+        return false;
+        break;
+      }
+      if(eventQuery)
+      {
+          eventQuery->Release();
+      }
+#else
+      return TEST_NOT_IMPLEMENTED;
+#endif
+    }
+
+    error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList.at(0), 0, 0, 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    size_t origin[3] = {0,0,0};
+    size_t offset = 0;
+    size_t frameSize = width * height * 3 / 2;
+    std::vector<cl_uchar> out( frameSize, 0 );
+    for (size_t i = 0; i < memObjList.size(); ++i)
+    {
+      size_t planeWidth = (i == 0) ? width: width / 2;
+      size_t planeHeight = (i == 0) ? height: height / 2;
+      size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+      error = clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, regionPlane, 0, 0, &out.at(offset), 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      offset += planeWidth * planeHeight;
+    }
+
+    if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
+    {
+      log_error("OCL object verification failed - clEnqueueReadImage\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList.at(0), 0, 0, 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if (userSync == CL_TRUE)
+    {
+      error = clFinish(cmdQueue);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clFinish failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    //shared object verification
+    std::vector<cl_uchar> bufferOut(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+  }
+
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+
+  return result.Result();
+}
+
+int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  const unsigned int WIDTH = 256;
+  const unsigned int HEIGHT = 256;
+
+  std::vector<cl_dx9_media_adapter_type_khr> adapters;
+#if defined(_WIN32)
+  adapters.push_back(CL_ADAPTER_D3D9_KHR);
+  adapters.push_back(CL_ADAPTER_D3D9EX_KHR);
+  adapters.push_back(CL_ADAPTER_DXVA_KHR);
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  std::vector<TContextFuncType> contextFuncs;
+  contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
+  contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
+
+  std::vector<TSurfaceFormat> formats;
+  formats.push_back(SURFACE_FORMAT_NV12);
+  formats.push_back(SURFACE_FORMAT_YV12);
+
+  std::vector<TSharedHandleType> sharedHandleTypes;
+  sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
+  sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
+
+  std::vector<cl_bool> sync;
+  sync.push_back(CL_FALSE);
+  sync.push_back(CL_TRUE);
+
+  CResult result;
+  for (size_t adapterIdx = 0; adapterIdx < adapters.size(); ++adapterIdx)
+  {
+    //iteration through all create context functions
+    for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx)
+    {
+      //iteration through YUV formats
+      for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
+      {
+        //shared handle enabled or disabled
+        for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx)
+        {
+          //user sync interop disabled or enabled
+          for (size_t syncIdx = 0; syncIdx < sync.size(); ++syncIdx)
+          {
+            if (adapters[adapterIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)
+              continue;
+
+            if(interop_user_sync(deviceID, context, queue, num_elements, WIDTH, HEIGHT,
+              contextFuncs[contextFuncIdx], adapters[adapterIdx], formats[formatIdx],
+              sharedHandleTypes[sharedHandleIdx], sync[syncIdx]) != 0)
+            {
+              std::string syncStr = (sync[syncIdx] == CL_TRUE) ? "user sync enabled": "user sync disabled";
+              std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle";
+              std::string adapterStr;
+              std::string formatStr;
+              SurfaceFormatToString(formats[formatIdx], formatStr);
+              AdapterToString(adapters[adapterIdx], adapterStr);
+
+              log_error("\nTest case - clCreateContext (%s, %s, %s, %s) failed\n\n", adapterStr.c_str(), formatStr.c_str(), sharedHandle.c_str(), syncStr.c_str());
+              result.ResultSub(CResult::TEST_FAIL);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return result.Result();
+}

diff --git a/test_extensions/media_sharing/test_memory_access.cpp b/test_extensions/media_sharing/test_memory_access.cpp
new file mode 100644
index 0000000..5aabaf6
--- /dev/null
+++ b/test_extensions/media_sharing/test_memory_access.cpp

@@ -0,0 +1,468 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                  unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  CResult result;
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  //creates device
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input and expected data
+  size_t frameSize = width * height * 3 / 2;
+  std::vector<cl_uchar> bufferRef0(frameSize, 0);
+  std::vector<cl_uchar> bufferRef1(frameSize, 0);
+  std::vector<cl_uchar> bufferRef2(frameSize, 0);
+  if (!YUVGenerate(surfaceFormat, bufferRef0, width, height, 0, 90) ||
+    !YUVGenerate(surfaceFormat, bufferRef1, width, height, 91, 180) ||
+    !YUVGenerate(surfaceFormat, bufferRef2, width, height, 181, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //iterates through all devices
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+
+    //creates surface
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef0, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    { //memory access write
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_WRITE_ONLY, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for WRITE_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+      for (size_t i = 0; i < memObjList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+        error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+          0, 0, &bufferRef1[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        offset += planeWidth * planeHeight;
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    std::vector<cl_uchar> bufferOut0(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut0, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut0, bufferRef1, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    { //memory access read
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_ONLY, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      std::vector<cl_uchar> out( frameSize, 0 );
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+
+      for (size_t i = 0; i < memObjList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+        error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+          &out[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        offset += planeWidth * planeHeight;
+      }
+
+      if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
+      {
+        log_error("OCL image (READ_ONLY) is different then expected\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    std::vector<cl_uchar> bufferOut1(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut1, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut1, bufferRef1, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    { //memory access read write
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_WRITE plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      { //read
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
+        {
+          log_error("OCL image (READ_WRITE) is different then expected\n");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //write
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+            0, 0, &bufferRef2[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    std::vector<cl_uchar> bufferOut2(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut2, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut2, bufferRef2, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+  }
+
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+
+  return result.Result();
+}
+
+int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+  if(memory_access(deviceID, context, queue, num_elements, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+  if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 512, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}

diff --git a/test_extensions/media_sharing/test_other_data_types.cpp b/test_extensions/media_sharing/test_other_data_types.cpp
new file mode 100644
index 0000000..3a66725
--- /dev/null
+++ b/test_extensions/media_sharing/test_other_data_types.cpp

@@ -0,0 +1,1022 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+
+#include "utils.h"
+
+template<typename T>
+int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                    unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                    TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  const unsigned int FRAME_NUM = 2;
+  const float MAX_VALUE = 0.6f;
+  const std::string PROGRAM_STR =
+    "__kernel void TestFunction( read_only image2d_t imageIn, write_only image2d_t imageOut, "
+    NL "                            sampler_t sampler, __global int *imageRes)"
+    NL "{"
+    NL "  int w = get_global_id(0);"
+    NL "  int h = get_global_id(1);"
+    NL "  int width = get_image_width(imageIn);"
+    NL "  int height = get_image_height(imageOut);"
+    NL "  float4 color0 = read_imagef(imageIn, sampler, (int2)(w,h)) - 0.2f;"
+    NL "  float4 color1 = read_imagef(imageIn, sampler, (float2)(w,h)) - 0.2f;"
+    NL "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);"
+    NL "  write_imagef(imageOut, (int2)(w,h), color0);"
+    NL "  if(w == 0 && h == 0)"
+    NL "  {"
+    NL "    imageRes[0] = width;"
+    NL "    imageRes[1] = height;"
+    NL "  }"
+    NL "}";
+
+  CResult result;
+
+  cl_image_format format;
+  if(!SurfaceFormatToOCL(surfaceFormat, format))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    if(!ImageFormatCheck(ctx, CL_MEM_OBJECT_IMAGE2D, format))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by OCL (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    if (format.image_channel_data_type == CL_HALF_FLOAT)
+    {
+      if (DetectFloatToHalfRoundingMode(cmdQueue))
+      {
+        log_error("Unable to detect rounding mode\n");
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+
+    std::vector<std::vector<T> > bufferIn(FRAME_NUM);
+    std::vector<std::vector<T> > bufferExp(FRAME_NUM);
+    float step = MAX_VALUE / static_cast<float>(FRAME_NUM);
+    unsigned int planeNum = ChannelNum(surfaceFormat);
+    for (size_t i = 0; i < FRAME_NUM; ++i)
+    {
+      DataGenerate(surfaceFormat, format.image_channel_data_type, bufferIn[i], width, height, planeNum, step * i, step * (i + 1));
+      DataGenerate(surfaceFormat, format.image_channel_data_type, bufferExp[i], width, height, planeNum, step * i, step * (i + 1), 0.2f);
+    }
+
+    void *objectSrcHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceSrc;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    void *objectDstHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceDst;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceSrcInfo;
+    CD3D9SurfaceWrapper *dx9SurfaceSrc = (static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
+    surfaceSrcInfo.resource = *dx9SurfaceSrc;
+    surfaceSrcInfo.shared_handle = objectSrcHandle;
+
+    cl_dx9_surface_info_khr surfaceDstInfo;
+    CD3D9SurfaceWrapper *dx9SurfaceDst = (static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
+    surfaceDstInfo.resource = *dx9SurfaceDst;
+    surfaceDstInfo.shared_handle = objectDstHandle;
+#else
+    void *surfaceSrcInfo = 0;
+    void *surfaceDstInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    //create OCL shared object
+    clMemWrapper objectSrcShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceSrcInfo, 0, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    clMemWrapper objectDstShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceDstInfo, 0, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    std::vector<cl_mem> memObjList;
+    memObjList.push_back(objectSrcShared);
+    memObjList.push_back(objectDstShared);
+
+    if (!GetMemObjInfo(objectSrcShared, adapterType, surfaceSrc, objectSrcHandle))
+    {
+      log_error("Invalid memory object info\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if (!GetImageInfo(objectSrcShared, format, sizeof(T) * planeNum,
+      width * sizeof(T) * planeNum,  0, width, height, 0, 0))
+    {
+      log_error("clGetImageInfo failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      //surface set
+#if defined(_WIN32)
+      D3DLOCKED_RECT rect;
+      if (FAILED((*dx9SurfaceSrc)->LockRect(&rect, NULL, 0)))
+      {
+        log_error("Surface lock failed\n");
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      size_t pitch = rect.Pitch / sizeof(T);
+      size_t lineSize = width * planeNum * sizeof(T);
+      T *ptr = static_cast<T *>(rect.pBits);
+
+      for (size_t y = 0; y < height; ++y)
+        memcpy(ptr + y * pitch, &bufferIn[frameIdx % FRAME_NUM][y * width * planeNum], lineSize);
+
+      (*dx9SurfaceSrc)->UnlockRect();
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      size_t origin[3] = {0,0,0};
+      size_t region[3] = {width, height, 1};
+
+      { //read operation
+        std::vector<T> out( planeNum * width * height, 0 );
+        error = clEnqueueReadImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region, 0, 0, &out[0], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum))
+        {
+          log_error("Frame idx: %i, OCL object is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //write operation
+        error = clEnqueueWriteImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region,
+          0, 0, &bufferExp[frameIdx % FRAME_NUM][0], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //kernel operations
+        clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+        if(error != CL_SUCCESS)
+        {
+          log_error("Unable to create sampler\n");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t threads[ 2 ] = { width, height };
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        const char *progPtr = PROGRAM_STR.c_str();
+        if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction"))
+          result.ResultSub(CResult::TEST_FAIL);
+
+        error = clSetKernelArg( kernel, 0, sizeof( objectSrcShared ), &(objectSrcShared) );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( objectDstShared ), &(objectDstShared) );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t bufferSize = sizeof(cl_int) * 2;
+        clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes );
+
+        size_t localThreads[ 2 ];
+        error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to get work group size to use" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to execute test kernel" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        std::vector<cl_uint> imageResOut(2, 0);
+        error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[0] != width)
+        {
+          log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], width);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[1] != height)
+        {
+          log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], height);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //map operation
+        size_t mapOrigin[3] = {0,0,0};
+        size_t mapRegion[3] = {width, height, 1};
+
+        std::vector<T> out( width * height * planeNum, 0 );
+        size_t rowPitch = 0;
+        size_t slicePitch = 0;
+        void *mapPtr = clEnqueueMapImage(cmdQueue, objectDstShared, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, mapOrigin, mapRegion,
+          &rowPitch, &slicePitch, 0, 0, 0, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        for (size_t y = 0; y < height; ++y)
+          memcpy(&out[y * width * planeNum], static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T),
+          width * planeNum * sizeof(T));
+
+        if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum))
+        {
+          log_error("Frame idx: %i, Mapped OCL object is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        for (size_t y = 0; y < height; ++y)
+          memcpy(static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T), &bufferExp[frameIdx % FRAME_NUM][y * width * planeNum],
+          width * planeNum * sizeof(T));
+
+        error = clEnqueueUnmapMemObject(cmdQueue, objectDstShared, mapPtr, 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      std::vector<T> out(width * height * planeNum, 0);
+      //surface get
+#if defined(_WIN32)
+      if (FAILED((*dx9SurfaceDst)->LockRect(&rect, NULL, 0)))
+      {
+        log_error("Surface lock failed\n");
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      pitch = rect.Pitch / sizeof(T);
+      lineSize = width * planeNum * sizeof(T);
+      ptr = static_cast<T *>(rect.pBits);
+      for (size_t y = 0; y < height; ++y)
+        memcpy(&out[y * width * planeNum], ptr + y * pitch, lineSize);
+
+      (*dx9SurfaceDst)->UnlockRect();
+#else
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferExp[frameIdx % FRAME_NUM], width, height, planeNum))
+      {
+        log_error("Frame idx: %i, media object is different then expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+
+  return result.Result();
+}
+
+int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A32B32G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A32B32G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}

diff --git a/test_extensions/media_sharing/utils.cpp b/test_extensions/media_sharing/utils.cpp
new file mode 100644
index 0000000..f1f0f54
--- /dev/null
+++ b/test_extensions/media_sharing/utils.cpp

@@ -0,0 +1,1672 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+#include "harness/errorHelpers.h"
+#include "harness/rounding_mode.h"
+
+#include <math.h>
+
+static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
+
+
+CResult::CResult():
+_result(TEST_PASS), _resultLast(TEST_NORESULT)
+{
+
+}
+
+CResult::~CResult()
+{
+
+}
+
+CResult::TTestResult CResult::ResultLast() const
+{
+  return _resultLast;
+}
+
+int CResult::Result() const
+{
+  switch (_result)
+  {
+  case TEST_NORESULT:
+  case TEST_NOTSUPPORTED:
+  case TEST_PASS:
+    return 0;
+    break;
+  case TEST_FAIL:
+    return 1;
+    break;
+  case TEST_ERROR:
+    return 2;
+    break;
+  default:
+    return -1;
+    break;
+  }
+}
+
+void CResult::ResultSub( TTestResult result )
+{
+  _resultLast = result;
+  if (static_cast<int>(result) > static_cast<int>(_result))
+    _result = result;
+}
+
+void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction)
+{
+  switch(contextCreateFunction)
+  {
+  case CONTEXT_CREATE_DEFAULT:
+    contextFunction = "CreateContext";
+    break;
+  case CONTEXT_CREATE_FROM_TYPE:
+    contextFunction = "CreateContextFromType";
+    break;
+  default:
+    contextFunction = "Unknown";
+    log_error("FunctionContextCreateToString(): Unknown create function enum!");
+    break;
+  }
+}
+
+void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter)
+{
+  switch(adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+    adapter = "D3D9";
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    adapter = "D3D9EX";
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    adapter = "DXVA";
+    break;
+  default:
+    adapter = "Unknown";
+    log_error("AdapterToString(): Unknown adapter type!");
+    break;
+  }
+}
+
+cl_context_info AdapterTypeToContextInfo( cl_dx9_media_adapter_type_khr adapterType )
+{
+  switch (adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+    return CL_CONTEXT_ADAPTER_D3D9_KHR;
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    return CL_CONTEXT_ADAPTER_D3D9EX_KHR;
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    return CL_CONTEXT_ADAPTER_DXVA_KHR;
+    break;
+  default:
+    log_error("AdapterTypeToContextInfo(): Unknown adapter type!");
+    return 0;
+    break;
+  }
+}
+
+void YUVGenerateNV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd )
+{
+  yuv.clear();
+  yuv.resize(width * height * 3 / 2, 0);
+
+  double min = static_cast<double>(valueMin);
+  double max = static_cast<double>(valueMax);
+  double range = 255;
+  double add = static_cast<double>(valueAdd * range);
+  double stepX = (max - min) / static_cast<double>(width);
+  double stepY = (max - min) /static_cast<double>(height);
+
+  //generate Y plane
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width;
+    double valueYPlane0 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueXPlane0 = static_cast<double>(stepX * j);
+      yuv.at(offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
+    }
+  }
+
+  //generate UV planes
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int offset = width * height + i * width;
+    double valueYPlane1 = static_cast<double>(stepY * i);
+    double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane1 = static_cast<double>(stepX * j);
+      double valueXPlane2 = static_cast<double>(stepX * (width / 2 + j));
+
+      yuv.at(offset + j * 2) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
+      yuv.at(offset + j * 2 + 1) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
+    }
+  }
+}
+
+void YUVGenerateYV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
+{
+  yuv.clear();
+  yuv.resize(width * height * 3 / 2, 0);
+
+  double min = static_cast<double>(valueMin);
+  double max = static_cast<double>(valueMax);
+  double range = 255;
+  double add = static_cast<double>(valueAdd * range);
+  double stepX = (max - min) / static_cast<double>(width);
+  double stepY = (max - min) /static_cast<double>(height);
+
+  unsigned offset = 0;
+
+  //generate Y plane
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int plane0Offset = offset + i * width;
+    double valueYPlane0 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueXPlane0 = static_cast<double>(stepX * j);
+      yuv.at(plane0Offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
+    }
+  }
+
+  //generate V plane
+  offset += width * height;
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int plane1Offset = offset + i * width / 2;
+    double valueYPlane1 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane1 = static_cast<double>(stepX * j);
+      yuv.at(plane1Offset + j) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
+    }
+  }
+
+  //generate U plane
+  offset += width * height / 4;
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int plane2Offset = offset + i * width / 2;
+    double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane2 = static_cast<double>(stepX * j);
+      yuv.at(plane2Offset + j) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
+    }
+  }
+}
+
+
+bool YUVGenerate( TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd);
+    break;
+  case SURFACE_FORMAT_YV12:
+    YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd);
+    break;
+  default:
+    log_error("YUVGenerate(): Invalid surface type\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool YUVSurfaceSetNV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceSetNV12(): Surface lock failed\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t lineSize = width * sizeof(cl_uchar);
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  for (size_t y = 0; y < height; ++y)
+    memcpy(ptr + y * pitch, &yuv.at(y * width), lineSize);
+
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + height * pitch + y * pitch, &yuv.at(width * height + y * width), lineSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceSetYV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceSetYV12(): Surface lock failed!\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t pitchHalf = pitch / 2;
+  size_t lineSize = width * sizeof(cl_uchar);
+  size_t lineHalfSize = lineSize / 2;
+  size_t surfaceOffset = 0;
+  size_t yuvOffset = 0;
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+
+  for (size_t y = 0; y < height; ++y)
+    memcpy(ptr + surfaceOffset + y * pitch, &yuv.at(yuvOffset + y * width), lineSize);
+
+  surfaceOffset += height * pitch;
+  yuvOffset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
+
+  surfaceOffset += pitchHalf * height / 2;
+  yuvOffset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    if(!YUVSurfaceSetNV12(surface, yuv, width, height))
+      return false;
+    break;
+  case SURFACE_FORMAT_YV12:
+    if(!YUVSurfaceSetYV12(surface, yuv, width, height))
+      return false;
+    break;
+  default:
+    log_error("YUVSurfaceSet(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool YUVSurfaceGetNV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceGetNV12(): Surface lock failed!\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t lineSize = width * sizeof(cl_uchar);
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  size_t yuvOffset = 0;
+  size_t surfaceOffset = 0;
+  for (size_t y = 0; y < height; ++y)
+    memcpy(&yuv.at(yuvOffset + y * width), ptr + y * pitch, lineSize);
+
+  yuvOffset += width * height;
+  surfaceOffset += pitch * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceGetYV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceGetYV12(): Surface lock failed!\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t pitchHalf = pitch / 2;
+  size_t lineSize = width * sizeof(cl_uchar);
+  size_t lineHalfSize = lineSize / 2;
+  size_t surfaceOffset = 0;
+  size_t yuvOffset = 0;
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+
+  for (size_t y = 0; y < height; ++y)
+    memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
+
+  surfaceOffset += pitch * height;
+  yuvOffset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
+
+  surfaceOffset += pitchHalf * height / 2;
+  yuvOffset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    if(!YUVSurfaceGetNV12(surface, yuv, width, height))
+      return false;
+    break;
+  case SURFACE_FORMAT_YV12:
+    if(!YUVSurfaceGetYV12(surface, yuv, width, height))
+      return false;
+    break;
+  default:
+    log_error("YUVSurfaceGet(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool YUVCompareNV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height )
+{
+  //plane 0 verification
+  size_t offset = 0;
+  for (size_t y = 0; y < height; ++y)
+  {
+    size_t plane0Offset = offset + width * y;
+    for (size_t x = 0; x < width; ++x)
+    {
+      if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x])
+      {
+        log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+
+  //plane 1 and 2 verification
+  offset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane12Offset = offset + width * y;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if (yuvTest.at(plane12Offset + 2 * x) != yuvRef.at(plane12Offset + 2 * x))
+      {
+        log_error("Plane 1 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane12Offset + 2 * x], yuvTest[plane12Offset + 2 * x], x, y);
+        return false;
+      }
+
+      if (yuvTest.at(plane12Offset + 2 * x + 1) != yuvRef.at(plane12Offset + 2 * x + 1))
+      {
+        log_error("Plane 2 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane12Offset + 2 * x + 1], yuvTest[plane12Offset + 2 * x + 1], x, y);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool YUVCompareYV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height )
+{
+  //plane 0 verification
+  size_t offset = 0;
+  for (size_t y = 0; y < height; ++y)
+  {
+    size_t plane0Offset = width * y;
+    for (size_t x = 0; x < width; ++x)
+    {
+      if (yuvTest.at(plane0Offset + x) != yuvRef.at(plane0Offset + x))
+      {
+        log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x ,y);
+        return false;
+      }
+    }
+  }
+
+  //plane 1 verification
+  offset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane1Offset = offset + width * y / 2;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if (yuvTest.at(plane1Offset + x) != yuvRef.at(plane1Offset + x))
+      {
+        log_error("Plane 1 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane1Offset + x], yuvTest[plane1Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+
+  //plane 2 verification
+  offset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane2Offset = offset + width * y / 2;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if (yuvTest.at(plane2Offset + x) != yuvRef.at(plane2Offset + x))
+      {
+        log_error("Plane 2 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane2Offset + x], yuvTest[plane2Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool YUVCompare( TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                unsigned int width, unsigned int height )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    if (!YUVCompareNV12(yuvTest, yuvRef, width, height))
+    {
+      log_error("OCL object is different than expected!\n");
+      return false;
+    }
+    break;
+  case SURFACE_FORMAT_YV12:
+    if (!YUVCompareYV12(yuvTest, yuvRef, width, height))
+    {
+      log_error("OCL object is different than expected!\n");
+      return false;
+    }
+    break;
+  default:
+    log_error("YUVCompare(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+{
+  data.clear();
+  data.reserve(width * height * channelNum);
+
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+  double valueAdd = static_cast<double>(add);
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    double valueY = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueX = static_cast<double>(stepX * j);
+      switch (channelNum)
+      {
+      case 1:
+        data.push_back(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+        break;
+      case 2:
+        data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
+        break;
+      case 4:
+        data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueX / 2 + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY / 2 + valueAdd));
+        break;
+      default:
+        log_error("DataGenerate(): invalid channel number!");
+        return;
+        break;
+      }
+    }
+  }
+}
+
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+{
+  data.clear();
+  data.reserve(width * height * channelNum);
+
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+
+  switch(type)
+  {
+  case CL_HALF_FLOAT:
+    {
+      double valueAdd = static_cast<double>(add);
+
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        double valueY = static_cast<double>(stepY * i);
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          double valueX = static_cast<double>(stepX * j);
+          switch (channelNum)
+          {
+          case 1:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd)));
+            break;
+          case 2:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
+            break;
+          case 4:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY / 2 + valueAdd)));
+            break;
+          default:
+            log_error("DataGenerate(): invalid channel number!");
+            return;
+            break;
+          }
+        }
+      }
+      break;
+    }
+  case CL_UNORM_INT16:
+    {
+      double range = 65535;
+      double valueAdd = static_cast<double>(add * range);
+
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        double valueY = static_cast<double>(stepY * i * range);
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          double valueX = static_cast<double>(stepX * j * range);
+          switch (channelNum)
+          {
+          case 1:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+            break;
+          case 2:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
+            break;
+          case 4:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY / 2 + valueAdd));
+            break;
+          default:
+            log_error("DataGenerate(): invalid channel number!");
+            return;
+            break;
+          }
+        }
+      }
+    }
+    break;
+  default:
+    log_error("DataGenerate(): unknown data type!");
+    return;
+    break;
+  }
+}
+
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+{
+  data.clear();
+  data.reserve(width * height * channelNum);
+
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+
+  double range = 255;
+  double valueAdd = static_cast<double>(add * range);
+
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    double valueY = static_cast<double>(stepY * i * range);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueX = static_cast<double>(stepX * j * range);
+      switch (channelNum)
+      {
+      case 1:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+        break;
+      case 2:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
+        break;
+      case 4:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueAdd));
+        if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8)
+          data.push_back(static_cast<cl_uchar>(0xff));
+        else
+          data.push_back(static_cast<cl_uchar>(valueMin + valueY / 2 + valueAdd));
+        break;
+      default:
+        log_error("DataGenerate(): invalid channel number!");
+        return;
+        break;
+      }
+    }
+  }
+}
+
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<float> &dataTest, const std::vector<float> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum)
+{
+  float epsilon = 0.000001f;
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width * channelNum;
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+      {
+        if (abs(dataTest.at(offset + j * channelNum + planeIdx) - dataExp.at(offset + j * channelNum + planeIdx)) > epsilon)
+        {
+          log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
+            j, i, planeIdx, dataTest[offset + j * channelNum + planeIdx], dataExp[offset + j * channelNum + planeIdx]);
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum)
+{
+  switch(type)
+  {
+  case CL_HALF_FLOAT:
+    {
+      float epsilon = 0.001f;
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        unsigned int offset = i * width * channelNum;
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+          {
+            float test = convert_half_to_float(dataTest.at(offset + j * channelNum + planeIdx));
+            float ref = convert_half_to_float(dataExp.at(offset + j * channelNum + planeIdx));
+            if (abs(test - ref) > epsilon)
+            {
+              log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
+                j, i, planeIdx, test, ref);
+              return false;
+            }
+          }
+        }
+      }
+    }
+    break;
+  case CL_UNORM_INT16:
+    {
+      cl_ushort epsilon = 1;
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        unsigned int offset = i * width * channelNum;
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+          {
+            cl_ushort test = dataTest.at(offset + j * channelNum + planeIdx);
+            cl_ushort ref = dataExp.at(offset + j * channelNum + planeIdx);
+            if (abs(test - ref) > epsilon)
+            {
+              log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n", j, i, planeIdx, test, ref);
+              return false;
+            }
+          }
+        }
+      }
+    }
+    break;
+  default:
+    log_error("DataCompare(): Invalid data format!");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int planeNum )
+{
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width * planeNum;
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      for(unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx)
+      {
+        if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3)
+          continue;
+
+        cl_uchar test = dataTest.at(offset + j * planeNum + planeIdx);
+        cl_uchar ref = dataExp.at(offset + j * planeNum + planeIdx);
+        if (test != ref)
+        {
+          log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n",
+            j, i, planeIdx, test, ref);
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+bool GetImageInfo( cl_mem object, cl_image_format formatExp, size_t elementSizeExp, size_t rowPitchExp,
+                  size_t slicePitchExp, size_t widthExp, size_t heightExp, size_t depthExp , unsigned int planeExp)
+{
+  bool result = true;
+
+  cl_image_format format;
+  if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format), &format, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n");
+    result = false;
+  }
+
+  if (formatExp.image_channel_order != format.image_channel_order || formatExp.image_channel_data_type != format.image_channel_data_type)
+  {
+    log_error("Value of CL_IMAGE_FORMAT is different than expected\n");
+    result = false;
+  }
+
+  size_t elementSize = 0;
+  if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elementSize, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n");
+    result = false;
+  }
+
+  if (elementSizeExp != elementSize)
+  {
+    log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected (size: %i, exp size: %i)\n", elementSize, elementSizeExp);
+    result = false;
+  }
+
+  size_t rowPitch = 0;
+  if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n");
+    result = false;
+  }
+
+  if ((rowPitchExp == 0 && rowPitchExp != rowPitch) || (rowPitchExp > 0 && rowPitchExp > rowPitch))
+  {
+    log_error("Value of CL_IMAGE_ROW_PITCH is different than expected (size: %i, exp size: %i)\n", rowPitch, rowPitchExp);
+    result = false;
+  }
+
+  size_t slicePitch = 0;
+  if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &slicePitch, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n");
+    result = false;
+  }
+
+  if ((slicePitchExp == 0 && slicePitchExp != slicePitch) || (slicePitchExp > 0 && slicePitchExp > slicePitch))
+  {
+    log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected (size: %i, exp size: %i)\n", slicePitch, slicePitchExp);
+    result = false;
+  }
+
+  size_t width = 0;
+  if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n");
+    result = false;
+  }
+
+  if (widthExp != width)
+  {
+    log_error("Value of CL_IMAGE_WIDTH is different than expected (size: %i, exp size: %i)\n", width, widthExp);
+    result = false;
+  }
+
+  size_t height = 0;
+  if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n");
+    result = false;
+  }
+
+  if (heightExp != height)
+  {
+    log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: %i, exp size: %i)\n", height, heightExp);
+    result = false;
+  }
+
+  size_t depth = 0;
+  if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n");
+    result = false;
+  }
+
+  if (depthExp != depth)
+  {
+    log_error("Value of CL_IMAGE_DEPTH is different than expected (size: %i, exp size: %i)\n", depth, depthExp);
+    result = false;
+  }
+
+  unsigned int plane = 99;
+  size_t paramSize = 0;
+  if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(unsigned int), &plane, &paramSize) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n");
+    result = false;
+  }
+
+  if (planeExp != plane)
+  {
+    log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than expected (plane: %i, exp plane: %i)\n", plane, planeExp);
+    result = false;
+  }
+
+  return result;
+}
+
+bool GetMemObjInfo( cl_mem object, cl_dx9_media_adapter_type_khr adapterType,  std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp )
+{
+  bool result = true;
+  switch(adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+  case CL_ADAPTER_D3D9EX_KHR:
+  case CL_ADAPTER_DXVA_KHR:
+    {
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+#else
+      void *surfaceInfo = 0;
+      return false;
+#endif
+      size_t paramSize = 0;
+      if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(surfaceInfo), &surfaceInfo, &paramSize) != CL_SUCCESS)
+      {
+        log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) failed\n");
+        result = false;
+      }
+
+#if defined(_WIN32)
+      CD3D9SurfaceWrapper *d3d9Surface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      if (*d3d9Surface != surfaceInfo.resource)
+      {
+        log_error("Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
+        result = false;
+      }
+
+      if (shareHandleExp != surfaceInfo.shared_handle)
+      {
+        log_error("Invalid shared handle for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
+        result = false;
+      }
+#else
+      return false;
+#endif
+
+      if (paramSize != sizeof(surfaceInfo))
+      {
+        log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(surfaceInfo));
+        result = false;
+      }
+
+      paramSize = 0;
+      cl_dx9_media_adapter_type_khr mediaAdapterType;
+      if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(mediaAdapterType), &mediaAdapterType, &paramSize) != CL_SUCCESS)
+      {
+        log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) failed\n");
+        result = false;
+      }
+
+      if (adapterType != mediaAdapterType)
+      {
+        log_error("Invalid media adapter type for CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n");
+        result = false;
+      }
+
+      if (paramSize != sizeof(mediaAdapterType))
+      {
+        log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(mediaAdapterType));
+        result = false;
+      }
+    }
+    break;
+  default:
+    log_error("GetMemObjInfo(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return result;
+}
+
+bool ImageInfoVerify( cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
+                     std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle)
+{
+  if (memObjList.size() != 2 && memObjList.size() != 3)
+  {
+    log_error("ImageInfoVerify(): Invalid object list parameter\n");
+    return false;
+  }
+
+  cl_image_format formatPlane;
+  formatPlane.image_channel_data_type = CL_UNORM_INT8;
+  formatPlane.image_channel_order = CL_R;
+
+  //plane 0 verification
+  if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar),
+    width * sizeof(cl_uchar),
+    0,
+    width, height, 0, 0))
+  {
+    log_error("clGetImageInfo failed\n");
+    return false;
+  }
+
+  switch (memObjList.size())
+  {
+  case 2:
+    {
+      formatPlane.image_channel_data_type = CL_UNORM_INT8;
+      formatPlane.image_channel_order = CL_RG;
+      if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2,
+        width * sizeof(cl_uchar),
+        0,
+        width / 2, height / 2, 0, 1))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+    }
+    break;
+  case 3:
+    {
+      if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar),
+        width * sizeof(cl_uchar) / 2,
+        0,
+        width / 2, height / 2, 0, 1))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+
+      if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar),
+        width * sizeof(cl_uchar) / 2,
+        0,
+        width / 2, height / 2, 0, 2))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+    }
+    break;
+  default:
+    log_error("ImageInfoVerify(): Invalid object list parameter\n");
+    return false;
+    break;
+  }
+
+  for (size_t i = 0; i < memObjList.size(); ++i)
+  {
+    if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle))
+    {
+      log_error("clGetMemObjInfo(%i) failed\n", i);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck)
+{
+  cl_uint imageFormatsNum = 0;
+  cl_int error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum);
+  if(error != CL_SUCCESS)
+  {
+    log_error("clGetSupportedImageFormats failed\n");
+    return false;
+  }
+
+  if(imageFormatsNum < 1)
+  {
+    log_error("Invalid image format number returned by clGetSupportedImageFormats\n");
+    return false;
+  }
+
+  std::vector<cl_image_format> imageFormats(imageFormatsNum);
+  error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, imageFormatsNum, &imageFormats[0], 0);
+  if(error != CL_SUCCESS)
+  {
+    log_error("clGetSupportedImageFormats failed\n");
+    return false;
+  }
+
+  for(cl_uint i = 0; i < imageFormatsNum; ++i)
+  {
+    if(imageFormats[i].image_channel_data_type == imageFormatCheck.image_channel_data_type
+      && imageFormats[i].image_channel_order == imageFormatCheck.image_channel_order)
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+unsigned int ChannelNum( TSurfaceFormat surfaceFormat )
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+  case SURFACE_FORMAT_R16F:
+  case SURFACE_FORMAT_L16:
+  case SURFACE_FORMAT_A8:
+  case SURFACE_FORMAT_L8:
+    return 1;
+    break;
+  case SURFACE_FORMAT_G32R32F:
+  case SURFACE_FORMAT_G16R16F:
+  case SURFACE_FORMAT_G16R16:
+  case SURFACE_FORMAT_A8L8:
+    return 2;
+    break;
+  case SURFACE_FORMAT_NV12:
+  case SURFACE_FORMAT_YV12:
+    return 3;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+  case SURFACE_FORMAT_A16B16G16R16F:
+  case SURFACE_FORMAT_A16B16G16R16:
+  case SURFACE_FORMAT_A8B8G8R8:
+  case SURFACE_FORMAT_X8B8G8R8:
+  case SURFACE_FORMAT_A8R8G8B8:
+  case SURFACE_FORMAT_X8R8G8B8:
+    return 4;
+    break;
+  default:
+    log_error("ChannelNum(): unknown surface format!\n");
+    return 0;
+    break;
+  }
+}
+
+unsigned int PlanesNum( TSurfaceFormat surfaceFormat )
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+  case SURFACE_FORMAT_R16F:
+  case SURFACE_FORMAT_L16:
+  case SURFACE_FORMAT_A8:
+  case SURFACE_FORMAT_L8:
+  case SURFACE_FORMAT_G32R32F:
+  case SURFACE_FORMAT_G16R16F:
+  case SURFACE_FORMAT_G16R16:
+  case SURFACE_FORMAT_A8L8:
+  case SURFACE_FORMAT_A32B32G32R32F:
+  case SURFACE_FORMAT_A16B16G16R16F:
+  case SURFACE_FORMAT_A16B16G16R16:
+  case SURFACE_FORMAT_A8B8G8R8:
+  case SURFACE_FORMAT_X8B8G8R8:
+  case SURFACE_FORMAT_A8R8G8B8:
+  case SURFACE_FORMAT_X8R8G8B8:
+    return 1;
+    break;
+  case SURFACE_FORMAT_NV12:
+    return 2;
+    break;
+  case SURFACE_FORMAT_YV12:
+    return 3;
+    break;
+  default:
+    log_error("PlanesNum(): unknown surface format!\n");
+    return 0;
+    break;
+  }
+}
+
+#if defined(_WIN32)
+D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat)
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+    return D3DFMT_R32F;
+    break;
+  case SURFACE_FORMAT_R16F:
+    return D3DFMT_R16F;
+    break;
+  case SURFACE_FORMAT_L16:
+    return D3DFMT_L16;
+    break;
+  case SURFACE_FORMAT_A8:
+    return D3DFMT_A8;
+    break;
+  case SURFACE_FORMAT_L8:
+    return D3DFMT_L8;
+    break;
+  case SURFACE_FORMAT_G32R32F:
+    return D3DFMT_G32R32F;
+    break;
+  case SURFACE_FORMAT_G16R16F:
+    return D3DFMT_G16R16F;
+    break;
+  case SURFACE_FORMAT_G16R16:
+    return D3DFMT_G16R16;
+    break;
+  case SURFACE_FORMAT_A8L8:
+    return D3DFMT_A8L8;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    return D3DFMT_A32B32G32R32F;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    return D3DFMT_A16B16G16R16F;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    return D3DFMT_A16B16G16R16;
+    break;
+  case SURFACE_FORMAT_A8B8G8R8:
+    return D3DFMT_A8B8G8R8;
+    break;
+  case SURFACE_FORMAT_X8B8G8R8:
+    return D3DFMT_X8B8G8R8;
+    break;
+  case SURFACE_FORMAT_A8R8G8B8:
+    return D3DFMT_A8R8G8B8;
+    break;
+  case SURFACE_FORMAT_X8R8G8B8:
+    return D3DFMT_X8R8G8B8;
+    break;
+  case SURFACE_FORMAT_NV12:
+    return static_cast<D3DFORMAT>(MAKEFOURCC('N', 'V', '1', '2'));
+    break;
+  case SURFACE_FORMAT_YV12:
+    return static_cast<D3DFORMAT>(MAKEFOURCC('Y', 'V', '1', '2'));
+    break;
+  default:
+    log_error("SurfaceFormatToD3D(): unknown surface format!\n");
+    return D3DFMT_R32F;
+    break;
+  }
+}
+#endif
+
+bool DeviceCreate( cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device )
+{
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+    device = std::auto_ptr<CDeviceWrapper>(new CD3D9Wrapper());
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    device = std::auto_ptr<CDeviceWrapper>(new CD3D9ExWrapper());
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    device = std::auto_ptr<CDeviceWrapper>(new CDXVAWrapper());
+    break;
+#endif
+  default:
+    log_error("DeviceCreate(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return device->Status();
+}
+
+bool SurfaceFormatCheck( cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat )
+{
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+  case CL_ADAPTER_D3D9EX_KHR:
+  case CL_ADAPTER_DXVA_KHR:
+    {
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3D9 d3d9 = static_cast<LPDIRECT3D9>(device.D3D());
+      D3DDISPLAYMODE d3ddm;
+      d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm);
+
+      if( FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_SURFACE, d3dFormat)) )
+        return false;
+    }
+    break;
+#endif
+  default:
+    log_error("SurfaceFormatCheck(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format)
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+  case SURFACE_FORMAT_R16F:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_L16:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+  case SURFACE_FORMAT_A8:
+    format.image_channel_order = CL_A;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_L8:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_G32R32F:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+  case SURFACE_FORMAT_G16R16F:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_G16R16:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+  case SURFACE_FORMAT_A8L8:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+  case SURFACE_FORMAT_A8B8G8R8:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_X8B8G8R8:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_A8R8G8B8:
+    format.image_channel_order = CL_BGRA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_X8R8G8B8:
+    format.image_channel_order = CL_BGRA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_NV12:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_YV12:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  default:
+    log_error("SurfaceFormatToOCL(): Unknown surface format!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+void SurfaceFormatToString( TSurfaceFormat surfaceFormat, std::string &str )
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+    str = "R32F";
+    break;
+  case SURFACE_FORMAT_R16F:
+    str = "R16F";
+    break;
+  case SURFACE_FORMAT_L16:
+    str = "L16";
+    break;
+  case SURFACE_FORMAT_A8:
+    str = "A8";
+    break;
+  case SURFACE_FORMAT_L8:
+    str = "L8";
+    break;
+  case SURFACE_FORMAT_G32R32F:
+    str = "G32R32F";
+    break;
+  case SURFACE_FORMAT_G16R16F:
+    str = "G16R16F";
+    break;
+  case SURFACE_FORMAT_G16R16:
+    str = "G16R16";
+    break;
+  case SURFACE_FORMAT_A8L8:
+    str = "A8L8";
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    str = "A32B32G32R32F";
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    str = "A16B16G16R16F";
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    str = "A16B16G16R16";
+    break;
+  case SURFACE_FORMAT_A8B8G8R8:
+    str = "A8B8G8R8";
+    break;
+  case SURFACE_FORMAT_X8B8G8R8:
+    str = "X8B8G8R8";
+    break;
+  case SURFACE_FORMAT_A8R8G8B8:
+    str = "A8R8G8B8";
+    break;
+  case SURFACE_FORMAT_X8R8G8B8:
+    str = "X8R8G8B8";
+    break;
+  case SURFACE_FORMAT_NV12:
+    str = "NV12";
+    break;
+  case SURFACE_FORMAT_YV12:
+    str = "YV12";
+    break;
+  default:
+    log_error("SurfaceFormatToString(): unknown surface format!\n");
+    str = "unknown";
+    break;
+  }
+}
+
+bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
+                        CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle)
+{
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device();
+      hr = d3d9Device->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
+        sharedHandle ? objectSharedHandle: 0);
+
+      if ( FAILED(hr))
+      {
+        log_error("CreateOffscreenPlainSurface failed\n");
+        return false;
+      }
+    }
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3DDEVICE9EX d3d9ExDevice = (LPDIRECT3DDEVICE9EX)device.Device();
+      hr = d3d9ExDevice->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
+        sharedHandle ? objectSharedHandle: 0);
+
+      if ( FAILED(hr))
+      {
+        log_error("CreateOffscreenPlainSurface failed\n");
+        return false;
+      }
+    }
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device();
+      hr = dxvaDevice->CreateVideoSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, 0,
+        DXVAHD_SURFACE_TYPE_VIDEO_INPUT,  1, &(*d3dSurface), sharedHandle ? objectSharedHandle: 0);
+
+      if ( FAILED(hr))
+      {
+        log_error("CreateVideoSurface failed\n");
+        return false;
+      }
+    }
+    break;
+#endif
+  default:
+    log_error("MediaSurfaceCreate(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+cl_ushort float2half_rte( float f )
+{
+  union{ float f; cl_uint u; } u = {f};
+  cl_uint sign = (u.u >> 16) & 0x8000;
+  float x = fabsf(f);
+
+  //Nan
+  if( x != x )
+  {
+    u.u >>= (24-11);
+    u.u &= 0x7fff;
+    u.u |= 0x0200;      //silence the NaN
+    return u.u | sign;
+  }
+
+  // overflow
+  if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+    return 0x7c00 | sign;
+
+  // underflow
+  if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+    return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+  // very small
+  if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+    return sign | 1;
+
+  // half denormal
+  if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+  {
+    u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+    return sign | u.u;
+  }
+
+  u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+  u.u &= 0x7f800000;
+  x += u.f;
+  u.f = x - u.f;
+  u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+  return (u.u >> (24-11)) | sign;
+}
+
+cl_ushort float2half_rtz( float f )
+{
+  union{ float f; cl_uint u; } u = {f};
+  cl_uint sign = (u.u >> 16) & 0x8000;
+  float x = fabsf(f);
+
+  //Nan
+  if( x != x )
+  {
+    u.u >>= (24-11);
+    u.u &= 0x7fff;
+    u.u |= 0x0200;      //silence the NaN
+    return u.u | sign;
+  }
+
+  // overflow
+  if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+  {
+    if( x == INFINITY )
+      return 0x7c00 | sign;
+
+    return 0x7bff | sign;
+  }
+
+  // underflow
+  if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+  // half denormal
+  if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+  {
+    x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+    return (cl_ushort)((int) x | sign);
+  }
+
+  u.u &= 0xFFFFE000U;
+  u.u -= 0x38000000U;
+
+  return (u.u >> (24-11)) | sign;
+}
+
+cl_int deviceExistForCLTest(cl_platform_id platform,
+     cl_dx9_media_adapter_type_khr media_adapters_type,
+     void *media_adapters,
+     CResult &result,
+     TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/
+     )
+{
+    cl_int _error;
+    cl_uint devicesAllNum = 0;
+    std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+    std::string adapterStr;
+    AdapterToString(media_adapters_type, adapterStr);
+
+    _error = clGetDeviceIDsFromDX9MediaAdapterKHR(platform, 1,
+        &media_adapters_type, &media_adapters, CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
+
+    if (_error != CL_SUCCESS)
+    {
+        if(_error != CL_DEVICE_NOT_FOUND)
+        {
+           log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(_error));
+           result.ResultSub(CResult::TEST_ERROR);
+        }
+        else
+        {
+          log_info("Skipping test case, device type is not supported by a device (adapter type: %s, shared handle: %s)\n", adapterStr.c_str(), sharedHandleStr.c_str());
+          result.ResultSub(CResult::TEST_NOTSUPPORTED);
+        }
+    }
+
+    return _error;
+}

diff --git a/test_extensions/media_sharing/utils.h b/test_extensions/media_sharing/utils.h
new file mode 100644
index 0000000..38908ab
--- /dev/null
+++ b/test_extensions/media_sharing/utils.h

@@ -0,0 +1,174 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __UTILS_KHR_MEDIA_H
+#define __UTILS_KHR_MEDIA_H
+
+#include <string>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include "wrappers.h"
+#include "CL/cl_dx9_media_sharing.h"
+
+#include "harness/typeWrappers.h"
+
+
+
+
+
+extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR;
+extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR;
+extern clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR;
+extern clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR;
+
+extern cl_platform_id gPlatformIDdetected;
+extern cl_device_id gDeviceIDdetected;
+extern cl_device_type gDeviceTypeSelected;
+
+#define NL "\n"
+#define TEST_NOT_IMPLEMENTED -1
+#define TEST_NOT_SUPPORTED -2
+
+enum TSurfaceFormat
+{
+  SURFACE_FORMAT_NV12,
+  SURFACE_FORMAT_YV12,
+  SURFACE_FORMAT_R32F,
+  SURFACE_FORMAT_R16F,
+  SURFACE_FORMAT_L16,
+  SURFACE_FORMAT_A8,
+  SURFACE_FORMAT_L8,
+  SURFACE_FORMAT_G32R32F,
+  SURFACE_FORMAT_G16R16F,
+  SURFACE_FORMAT_G16R16,
+  SURFACE_FORMAT_A8L8,
+  SURFACE_FORMAT_A32B32G32R32F,
+  SURFACE_FORMAT_A16B16G16R16F,
+  SURFACE_FORMAT_A16B16G16R16,
+  SURFACE_FORMAT_A8B8G8R8,
+  SURFACE_FORMAT_X8B8G8R8,
+  SURFACE_FORMAT_A8R8G8B8,
+  SURFACE_FORMAT_X8R8G8B8,
+};
+
+enum TContextFuncType
+{
+  CONTEXT_CREATE_DEFAULT,
+  CONTEXT_CREATE_FROM_TYPE,
+};
+
+enum TSharedHandleType
+{
+  SHARED_HANDLE_ENABLED,
+  SHARED_HANDLE_DISABLED,
+};
+
+class CResult {
+public:
+  enum TTestResult {
+    TEST_NORESULT,
+    TEST_NOTSUPPORTED,
+    TEST_PASS,
+    TEST_FAIL,
+    TEST_ERROR,
+  };
+
+  CResult();
+  ~CResult();
+
+  void ResultSub(TTestResult result);
+  TTestResult ResultLast() const;
+  int Result() const;
+
+private:
+  TTestResult _result;
+  TTestResult _resultLast;
+};
+
+void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction);
+void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter);
+cl_context_info AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType);
+
+//YUV utils
+void YUVGenerateNV12(std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+void YUVGenerateYV12(std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                 cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+bool YUVSurfaceSetNV12(std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceSetYV12(std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height);
+bool YUVSurfaceGetNV12(std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceGetYV12(std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height);
+bool YUVCompareNV12(const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height);
+bool YUVCompareYV12(const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height);
+bool YUVCompare(TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                unsigned int width, unsigned int height);
+
+//other types utils
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_float> &dataTest, const std::vector<cl_float> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+
+bool GetImageInfo(cl_mem object, cl_image_format formatExp, size_t elementSizeExp,
+                  size_t rowPitchExp, size_t slicePitchExp, size_t widthExp,
+                  size_t heightExp, size_t depthExp, unsigned int planeExp);
+bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp);
+bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
+                     std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle);
+bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck);
+unsigned int ChannelNum(TSurfaceFormat surfaceFormat);
+unsigned int PlanesNum(TSurfaceFormat surfaceFormat);
+
+#if defined(_WIN32)
+D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat);
+#endif
+
+bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device);
+bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat);
+bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format);
+void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str );
+bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
+                      CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle);
+
+//imported from image helpers
+cl_ushort float2half_rte( float f );
+cl_ushort float2half_rtz( float f );
+cl_ushort convert_float_to_half( float f );
+float convert_half_to_float( unsigned short halfValue );
+int DetectFloatToHalfRoundingMode( cl_command_queue );
+
+cl_int deviceExistForCLTest(cl_platform_id platform,cl_dx9_media_adapter_type_khr media_adapters_type,void *media_adapters,CResult &result,TSharedHandleType sharedHandle=SHARED_HANDLE_DISABLED);
+#endif  // __UTILS_KHR_MEDIA_H

diff --git a/test_extensions/media_sharing/wrappers.cpp b/test_extensions/media_sharing/wrappers.cpp
new file mode 100644
index 0000000..e7eb5b2
--- /dev/null
+++ b/test_extensions/media_sharing/wrappers.cpp

@@ -0,0 +1,562 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "wrappers.h"
+#include "harness/errorHelpers.h"
+
+LPCTSTR CDeviceWrapper::WINDOW_TITLE = _T( "cl_khr_dx9_media_sharing" );
+const int CDeviceWrapper::WINDOW_WIDTH = 256;
+const int CDeviceWrapper::WINDOW_HEIGHT = 256;
+CDeviceWrapper::TAccelerationType CDeviceWrapper::accelerationType = CDeviceWrapper::ACCELERATION_HW;
+
+#if defined(_WIN32)
+const D3DFORMAT CDXVAWrapper::RENDER_TARGET_FORMAT = D3DFMT_X8R8G8B8;
+const D3DFORMAT CDXVAWrapper::VIDEO_FORMAT = D3DFMT_X8R8G8B8;
+const unsigned int CDXVAWrapper::VIDEO_FPS = 60;
+#endif
+
+#if defined(_WIN32)
+static LRESULT WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
+{
+  switch(msg)
+  {
+  case WM_DESTROY:
+    PostQuitMessage(0);
+    return 0;
+  case WM_PAINT:
+    ValidateRect(hWnd, 0);
+    return 0;
+  default:
+    break;
+  }
+
+  return DefWindowProc(hWnd, msg, wParam, lParam);
+}
+#endif
+
+CDeviceWrapper::CDeviceWrapper()
+#if defined(_WIN32)
+:_hInstance(NULL),_hWnd(NULL)
+#endif
+{
+
+}
+
+void CDeviceWrapper::WindowInit()
+{
+#if defined(_WIN32)
+  _hInstance = GetModuleHandle(NULL);
+  static WNDCLASSEX wc =
+  {
+    sizeof(WNDCLASSEX),
+    CS_CLASSDC,
+    WndProc,
+    0L,
+    0L,
+    _hInstance,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    WINDOW_TITLE,
+    NULL
+  };
+
+  RegisterClassEx(&wc);
+
+  _hWnd = CreateWindow(
+    WINDOW_TITLE,
+    WINDOW_TITLE,
+    WS_OVERLAPPEDWINDOW,
+    0, 0,
+    WINDOW_WIDTH, WINDOW_HEIGHT,
+    NULL,
+    NULL,
+    wc.hInstance,
+    NULL);
+
+  if (!_hWnd)
+  {
+    log_error("Failed to create window");
+    return;
+  }
+
+  ShowWindow(_hWnd,SW_SHOWDEFAULT);
+  UpdateWindow(_hWnd);
+#endif
+}
+
+void CDeviceWrapper::WindowDestroy()
+{
+#if defined(_WIN32)
+  if (_hWnd)
+    DestroyWindow(_hWnd);
+  _hWnd = NULL;
+#endif
+}
+
+#if defined(_WIN32)
+HWND CDeviceWrapper::WindowHandle() const
+{
+  return _hWnd;
+}
+#endif
+
+int CDeviceWrapper::WindowWidth() const
+{
+  return WINDOW_WIDTH;
+}
+
+int CDeviceWrapper::WindowHeight() const
+{
+  return WINDOW_HEIGHT;
+}
+
+CDeviceWrapper::TAccelerationType CDeviceWrapper::AccelerationType()
+{
+  return accelerationType;
+}
+
+void CDeviceWrapper::AccelerationType( TAccelerationType accelerationTypeNew )
+{
+  accelerationType = accelerationTypeNew;
+}
+
+CDeviceWrapper::~CDeviceWrapper()
+{
+  WindowDestroy();
+}
+
+#if defined(_WIN32)
+CD3D9Wrapper::CD3D9Wrapper():
+_d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false)
+{
+  WindowInit();
+
+  _d3d9 = Direct3DCreate9(D3D_SDK_VERSION);
+  if (!_d3d9)
+  {
+    log_error("Direct3DCreate9 failed\n");
+    _status = DEVICE_FAIL;
+  }
+}
+
+CD3D9Wrapper::~CD3D9Wrapper()
+{
+  Destroy();
+
+  if(_d3d9)
+    _d3d9->Release();
+  _d3d9 = 0;
+}
+
+void CD3D9Wrapper::Destroy()
+{
+  if (_d3dDevice)
+    _d3dDevice->Release();
+  _d3dDevice = 0;
+}
+
+cl_int CD3D9Wrapper::Init()
+{
+  if (!WindowHandle())
+  {
+    log_error("D3D9: Window is not created\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+
+  if(!_d3d9 || DEVICE_PASS  != _status || !_adapterFound)
+    return false;
+
+  _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm);
+
+  D3DPRESENT_PARAMETERS d3dParams;
+  ZeroMemory(&d3dParams, sizeof(d3dParams));
+
+  d3dParams.Windowed = TRUE;
+  d3dParams.BackBufferCount = 1;
+  d3dParams.SwapEffect = D3DSWAPEFFECT_DISCARD;
+  d3dParams.hDeviceWindow = WindowHandle();
+  d3dParams.BackBufferWidth = WindowWidth();
+  d3dParams.BackBufferHeight = WindowHeight();
+  d3dParams.BackBufferFormat = _d3ddm.Format;
+
+  DWORD processingType = (AccelerationType() == ACCELERATION_HW)? D3DCREATE_HARDWARE_VERTEXPROCESSING:
+    D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+
+  if ( FAILED( _d3d9->CreateDevice( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(),
+    processingType, &d3dParams, &_d3dDevice) ) )
+  {
+    log_error("CreateDevice failed\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+
+  _d3dDevice->BeginScene();
+  _d3dDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
+  _d3dDevice->EndScene();
+
+  return true;
+}
+
+void * CD3D9Wrapper::D3D() const
+{
+  return _d3d9;
+}
+
+void *CD3D9Wrapper::Device() const
+{
+  return _d3dDevice;
+}
+
+D3DFORMAT CD3D9Wrapper::Format()
+{
+  return _d3ddm.Format;
+}
+
+D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter()
+{
+  return _adapter;
+}
+
+TDeviceStatus CD3D9Wrapper::Status() const
+{
+  return _status;
+}
+
+bool CD3D9Wrapper::AdapterNext()
+{
+  if (DEVICE_PASS != _status)
+    return false;
+
+  _adapterFound = false;
+  for(; _adapterIdx < _d3d9->GetAdapterCount();)
+  {
+    ++_adapterIdx;
+    D3DCAPS9 caps;
+    if (FAILED(_d3d9->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
+      continue;
+
+    if(FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
+    {
+      log_error("D3D9: GetAdapterIdentifier failed\n");
+      _status = DEVICE_FAIL;
+      return false;
+    }
+
+    _adapterFound = true;
+
+    Destroy();
+    if(!Init())
+    {
+      _status = DEVICE_FAIL;
+      _adapterFound = false;
+    }
+    break;
+  }
+
+  return _adapterFound;
+}
+
+unsigned int CD3D9Wrapper::AdapterIdx() const
+{
+  return _adapterIdx - 1;
+}
+
+
+CD3D9ExWrapper::CD3D9ExWrapper():
+_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false)
+{
+  WindowInit();
+
+  HRESULT result = Direct3DCreate9Ex(D3D_SDK_VERSION, &_d3d9Ex);
+  if (FAILED(result) || !_d3d9Ex)
+  {
+    log_error("Direct3DCreate9Ex failed\n");
+    _status = DEVICE_FAIL;
+  }
+}
+
+CD3D9ExWrapper::~CD3D9ExWrapper()
+{
+  Destroy();
+
+  if(_d3d9Ex)
+    _d3d9Ex->Release();
+  _d3d9Ex = 0;
+}
+
+void * CD3D9ExWrapper::D3D() const
+{
+  return _d3d9Ex;
+}
+
+void *CD3D9ExWrapper::Device() const
+{
+  return _d3dDeviceEx;
+}
+
+D3DFORMAT CD3D9ExWrapper::Format()
+{
+  return _d3ddmEx.Format;
+}
+
+D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter()
+{
+  return _adapter;
+}
+
+cl_int CD3D9ExWrapper::Init()
+{
+  if (!WindowHandle())
+  {
+    log_error("D3D9EX: Window is not created\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+
+  if(!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound)
+    return DEVICE_FAIL;
+
+  RECT rect;
+  GetClientRect(WindowHandle(),&rect);
+
+  D3DPRESENT_PARAMETERS d3dParams;
+  ZeroMemory(&d3dParams, sizeof(d3dParams));
+
+  d3dParams.Windowed = TRUE;
+  d3dParams.SwapEffect = D3DSWAPEFFECT_FLIP;
+  d3dParams.BackBufferFormat = D3DFMT_X8R8G8B8;
+  d3dParams.BackBufferWidth = WindowWidth();
+  d3dParams.BackBufferHeight = WindowHeight();
+
+  d3dParams.BackBufferCount = 1;
+  d3dParams.hDeviceWindow = WindowHandle();
+
+  DWORD processingType = (AccelerationType() == ACCELERATION_HW)? D3DCREATE_HARDWARE_VERTEXPROCESSING:
+    D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+
+  if ( FAILED( _d3d9Ex->CreateDeviceEx( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(),
+    processingType, &d3dParams, NULL, &_d3dDeviceEx) ) )
+  {
+    log_error("CreateDeviceEx failed\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+
+  _d3dDeviceEx->BeginScene();
+  _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
+  _d3dDeviceEx->EndScene();
+
+  return DEVICE_PASS;
+}
+
+void CD3D9ExWrapper::Destroy()
+{
+  if (_d3dDeviceEx)
+    _d3dDeviceEx->Release();
+  _d3dDeviceEx = 0;
+}
+
+TDeviceStatus CD3D9ExWrapper::Status() const
+{
+  return _status;
+}
+
+bool CD3D9ExWrapper::AdapterNext()
+{
+  if (DEVICE_FAIL == _status)
+    return false;
+
+  _adapterFound = false;
+  for(; _adapterIdx < _d3d9Ex->GetAdapterCount();)
+  {
+    ++_adapterIdx;
+    D3DCAPS9 caps;
+    if (FAILED(_d3d9Ex->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
+      continue;
+
+    if(FAILED(_d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
+    {
+      log_error("D3D9EX: GetAdapterIdentifier failed\n");
+      _status = DEVICE_FAIL;
+      return false;
+    }
+
+    _adapterFound = true;
+    Destroy();
+    if(!Init())
+    {
+      _status = DEVICE_FAIL;
+      _adapterFound = _status;
+    }
+
+    break;
+  }
+
+  return _adapterFound;
+}
+
+unsigned int CD3D9ExWrapper::AdapterIdx() const
+{
+  return _adapterIdx - 1;
+}
+
+CDXVAWrapper::CDXVAWrapper():
+_dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false)
+{
+  _status = _d3d9.Status();
+}
+
+CDXVAWrapper::~CDXVAWrapper()
+{
+  DXVAHDDestroy();
+}
+
+void * CDXVAWrapper::Device() const
+{
+  return _dxvaDevice;
+}
+
+TDeviceStatus CDXVAWrapper::Status() const
+{
+    if(_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL)
+        return DEVICE_FAIL;
+    else if(_status == DEVICE_NOTSUPPORTED || _d3d9.Status() == DEVICE_NOTSUPPORTED)
+        return DEVICE_NOTSUPPORTED;
+    else
+        return DEVICE_PASS;
+}
+
+bool CDXVAWrapper::AdapterNext()
+{
+  if (DEVICE_PASS != _status)
+    return false;
+
+  _adapterFound = _d3d9.AdapterNext();
+  _status = _d3d9.Status();
+  if (DEVICE_PASS != _status)
+  {
+    _adapterFound = false;
+    return false;
+  }
+
+  if (!_adapterFound)
+    return false;
+
+  DXVAHDDestroy();
+  _status = DXVAHDInit();
+  if (DEVICE_PASS != _status)
+  {
+    _adapterFound = false;
+    return false;
+  }
+
+  return true;
+}
+
+TDeviceStatus CDXVAWrapper::DXVAHDInit()
+{
+  if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL) || !_adapterFound)
+    return DEVICE_FAIL;
+
+  DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 };
+
+  DXVAHD_CONTENT_DESC desc;
+  desc.InputFrameFormat= DXVAHD_FRAME_FORMAT_PROGRESSIVE;
+  desc.InputFrameRate = fps;
+  desc.InputWidth = WindowWidth();
+  desc.InputHeight = WindowHeight();
+  desc.OutputFrameRate = fps;
+  desc.OutputWidth = WindowWidth();
+  desc.OutputHeight = WindowHeight();
+
+#ifdef USE_SOFTWARE_PLUGIN
+  _status = DEVICE_FAIL;
+  return DEVICE_FAIL;
+#endif
+
+  HRESULT hr = DXVAHD_CreateDevice(static_cast<IDirect3DDevice9Ex *>(_d3d9.Device()),
+    &desc, DXVAHD_DEVICE_USAGE_PLAYBACK_NORMAL, NULL, &_dxvaDevice);
+  if(FAILED(hr))
+  {
+    if (hr == E_NOINTERFACE)
+    {
+      log_error("DXVAHD_CreateDevice skipped due to no supported devices!\n");
+      _status = DEVICE_NOTSUPPORTED;
+    }
+    else
+    {
+    log_error("DXVAHD_CreateDevice failed\n");
+    _status = DEVICE_FAIL;
+    }
+  }
+
+  return _status;
+}
+
+void CDXVAWrapper::DXVAHDDestroy()
+{
+  if (_dxvaDevice)
+    _dxvaDevice->Release();
+  _dxvaDevice = 0;
+}
+
+void * CDXVAWrapper::D3D() const
+{
+  return _d3d9.D3D();
+}
+
+unsigned int CDXVAWrapper::AdapterIdx() const
+{
+  return _d3d9.AdapterIdx();
+}
+
+const CD3D9ExWrapper & CDXVAWrapper::D3D9() const
+{
+  return _d3d9;
+}
+
+CD3D9SurfaceWrapper::CD3D9SurfaceWrapper():
+mMem(NULL)
+{
+
+}
+
+CD3D9SurfaceWrapper::CD3D9SurfaceWrapper( IDirect3DSurface9* mem ):
+mMem(mem)
+{
+
+}
+
+CD3D9SurfaceWrapper::~CD3D9SurfaceWrapper()
+{
+  if(mMem != NULL)
+    mMem->Release();
+  mMem = NULL;
+}
+
+#endif
+
+CSurfaceWrapper::CSurfaceWrapper()
+{
+
+}
+
+CSurfaceWrapper::~CSurfaceWrapper()
+{
+
+}

diff --git a/test_extensions/media_sharing/wrappers.h b/test_extensions/media_sharing/wrappers.h
new file mode 100644
index 0000000..45b7032
--- /dev/null
+++ b/test_extensions/media_sharing/wrappers.h

@@ -0,0 +1,197 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __WRAPPERS_H
+#define __WRAPPERS_H
+
+#if defined(_WIN32)
+#include <d3d9.h>
+#if defined (__MINGW32__)
+#include <rpcsal.h>
+typedef unsigned char UINT8;
+#define __out
+#define __in
+#define __inout
+#define __out_bcount(size)
+#define __out_bcount_opt(size)
+#define __in_opt
+#define __in_ecount(size)
+#define __in_ecount_opt(size)
+#define __out_opt
+#define __out_ecount(size)
+#define __out_ecount_opt(size)
+#define __in_bcount_opt(size)
+#define __inout_opt
+#define __inout_bcount(size)
+#define __in_bcount(size)
+#define __deref_out
+#endif
+#include <dxvahd.h>
+#include <tchar.h>
+#endif
+
+enum TDeviceStatus
+{
+  DEVICE_NOTSUPPORTED,
+  DEVICE_PASS,
+  DEVICE_FAIL,
+};
+
+class CDeviceWrapper {
+public:
+  enum TAccelerationType
+  {
+    ACCELERATION_HW,
+    ACCELERATION_SW,
+  };
+
+  CDeviceWrapper();
+  virtual ~CDeviceWrapper();
+
+  virtual bool AdapterNext() = 0;
+  virtual unsigned int AdapterIdx() const = 0;
+  virtual void *Device() const = 0;
+  virtual TDeviceStatus Status() const = 0;
+  virtual void *D3D() const = 0;
+
+#if defined(_WIN32)
+  HWND WindowHandle() const;
+#endif
+  int WindowWidth() const;
+  int WindowHeight() const;
+  void WindowInit();
+
+
+  static TAccelerationType AccelerationType();
+  static void AccelerationType(TAccelerationType accelerationTypeNew);
+
+private:
+  static LPCTSTR WINDOW_TITLE;
+  static const int WINDOW_WIDTH;
+  static const int WINDOW_HEIGHT;
+  static TAccelerationType accelerationType;
+
+#if defined(_WIN32)
+  HMODULE _hInstance;
+  HWND _hWnd;
+#endif
+
+  void WindowDestroy();
+};
+
+class CSurfaceWrapper
+{
+public:
+  CSurfaceWrapper();
+  virtual ~CSurfaceWrapper();
+};
+
+#if defined(_WIN32)
+//windows specific wrappers
+class CD3D9Wrapper: public CDeviceWrapper {
+public:
+  CD3D9Wrapper();
+  ~CD3D9Wrapper();
+
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual TDeviceStatus Status() const;
+  virtual void *D3D() const;
+
+private:
+  LPDIRECT3D9 _d3d9;
+  LPDIRECT3DDEVICE9 _d3dDevice;
+  D3DDISPLAYMODE _d3ddm;
+  D3DADAPTER_IDENTIFIER9 _adapter;
+  TDeviceStatus _status;
+  unsigned int _adapterIdx;
+  bool _adapterFound;
+
+  D3DFORMAT Format();
+  D3DADAPTER_IDENTIFIER9 Adapter();
+  int Init();
+  void Destroy();
+};
+
+class CD3D9ExWrapper: public CDeviceWrapper {
+public:
+  CD3D9ExWrapper();
+  ~CD3D9ExWrapper();
+
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual TDeviceStatus Status() const;
+  virtual void *D3D() const;
+
+private:
+  LPDIRECT3D9EX _d3d9Ex;
+  LPDIRECT3DDEVICE9EX _d3dDeviceEx;
+  D3DDISPLAYMODEEX _d3ddmEx;
+  D3DADAPTER_IDENTIFIER9 _adapter;
+  TDeviceStatus _status;
+  unsigned int _adapterIdx;
+  bool _adapterFound;
+
+  D3DFORMAT Format();
+  D3DADAPTER_IDENTIFIER9 Adapter();
+  int Init();
+  void Destroy();
+};
+
+class CDXVAWrapper: public CDeviceWrapper {
+public:
+  CDXVAWrapper();
+  ~CDXVAWrapper();
+
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual TDeviceStatus Status() const;
+  virtual void *D3D() const;
+  const CD3D9ExWrapper &D3D9() const;
+
+private:
+  CD3D9ExWrapper _d3d9;
+  IDXVAHD_Device *_dxvaDevice;
+  TDeviceStatus _status;
+  bool _adapterFound;
+
+  static const D3DFORMAT RENDER_TARGET_FORMAT;
+  static const D3DFORMAT VIDEO_FORMAT;
+  static const unsigned int VIDEO_FPS;
+
+  TDeviceStatus DXVAHDInit();
+  void DXVAHDDestroy();
+};
+
+class CD3D9SurfaceWrapper: public CSurfaceWrapper
+{
+public:
+  CD3D9SurfaceWrapper();
+  CD3D9SurfaceWrapper( IDirect3DSurface9* mem );
+  ~CD3D9SurfaceWrapper();
+
+  operator IDirect3DSurface9*() { return mMem; }
+  IDirect3DSurface9* * operator&() { return &mMem; }
+  IDirect3DSurface9* operator->() const { return mMem; }
+
+private:
+  IDirect3DSurface9* mMem;
+};
+#endif
+
+#endif  // __D3D_WRAPPERS

diff --git a/travis.sh b/travis.sh
new file mode 100755
index 0000000..c7ad707
--- /dev/null
+++ b/travis.sh

@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+
+set -e
+
+export TOP=$(pwd)
+
+if [ "${JOB_CHECK_FORMAT}" -eq 1 ]; then
+    ./check-format.sh
+    exit $?
+fi
+
+TOOLCHAIN_URL_arm="https://releases.linaro.org/components/toolchain/binaries/7.5-2019.12/arm-linux-gnueabihf/gcc-linaro-7.5.0-2019.12-x86_64_arm-linux-gnueabihf.tar.xz"
+TOOLCHAIN_URL_aarch64="https://releases.linaro.org/components/toolchain/binaries/7.5-2019.12/aarch64-linux-gnu/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz"
+
+TOOLCHAIN_PREFIX_arm=arm-linux-gnueabihf
+TOOLCHAIN_PREFIX_aarch64=aarch64-linux-gnu
+
+TOOLCHAIN_FILE=${TOP}/toolchain.cmake
+touch ${TOOLCHAIN_FILE}
+
+# Prepare toolchain if needed
+if [[ ${JOB_ARCHITECTURE} != "" ]]; then
+    TOOLCHAIN_URL_VAR=TOOLCHAIN_URL_${JOB_ARCHITECTURE}
+    TOOLCHAIN_URL=${!TOOLCHAIN_URL_VAR}
+    wget ${TOOLCHAIN_URL}
+    TOOLCHAIN_ARCHIVE=${TOOLCHAIN_URL##*/}
+    tar xf ${TOOLCHAIN_ARCHIVE}
+    TOOLCHAIN_DIR=${TOP}/${TOOLCHAIN_ARCHIVE%.tar.xz}
+    export PATH=${TOOLCHAIN_DIR}/bin:${PATH}
+
+    TOOLCHAIN_PREFIX_VAR=TOOLCHAIN_PREFIX_${JOB_ARCHITECTURE}
+    TOOLCHAIN_PREFIX=${!TOOLCHAIN_PREFIX_VAR}
+
+    echo "SET(CMAKE_SYSTEM_NAME Linux)" >> ${TOOLCHAIN_FILE}
+    echo "SET(CMAKE_SYSTEM_PROCESSOR ${JOB_ARCHITECTURE})" >> ${TOOLCHAIN_FILE}
+    echo "SET(CMAKE_C_COMPILER   ${TOOLCHAIN_PREFIX}-gcc)" >> ${TOOLCHAIN_FILE}
+    echo "SET(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++)" >> ${TOOLCHAIN_FILE}
+    echo "SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> ${TOOLCHAIN_FILE}
+    echo "SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> ${TOOLCHAIN_FILE}
+    echo "SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> ${TOOLCHAIN_FILE}
+fi
+
+# Prepare headers
+git clone https://github.com/KhronosGroup/OpenCL-Headers.git
+cd OpenCL-Headers
+ln -s CL OpenCL # For OSX builds
+cd ..
+
+# Get and build loader
+git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
+cd ${TOP}/OpenCL-ICD-Loader
+mkdir build
+cd build
+cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ ..
+make
+
+# Get libclcxx
+cd ${TOP}
+git clone https://github.com/KhronosGroup/libclcxx.git
+
+# Build CTS
+ls -l
+mkdir build
+cd build
+cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
+      -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
+      -DCL_LIBCLCXX_DIR=${TOP}/libclcxx \
+      -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
+      -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
+      -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \
+      -DUSE_CL_EXPERIMENTAL=ON \
+      ..
+make -j2
+
commit	18ba13d4cd30ecc17137028005700cc22e797dbd	[log] [tgz]
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	Fri Apr 08 16:01:30 2022 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	Fri Apr 08 16:01:30 2022 +0000
tree	e82e1600981b5545830c12f105b4e415ab7287af
parent	27a1013439ea58ea4aaf49fc8601ec0b1773e839 [diff]
parent	5cfdcf077be7b3727160275e83ae6b754f01dd07 [diff]