Upgrade libjpeg-turbo to 9b894306ec3b28cea46e84c32b56773a98c483da am: eefbdb76d4 am: 1fc3e1d440

Original change: https://android-review.googlesource.com/c/platform/external/libjpeg-turbo/+/2901153

Change-Id: I6aab1be757022d1bfd58df1b193064cc0dbf79ca
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/BUILD.gn b/BUILD.gn
index d566340..b39d278 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -5,12 +5,15 @@
 # Do not use the targets in this file unless you need a certain libjpeg
 # implementation. Use the meta target //third_party:jpeg instead.
 
+import("//build/config/features.gni")
 import("//build/config/sanitizers/sanitizers.gni")
 if (current_cpu == "arm" || current_cpu == "arm64") {
   import("//build/config/arm.gni")
 }
 
-assert(!is_ios, "This is not used on iOS, don't drag it in unintentionally")
+assert(
+    use_blink,
+    "This is not used if blink is not enabled, don't drag it in unintentionally")
 
 source_set("libjpeg_headers") {
   sources = [
@@ -21,7 +24,6 @@
     "jpeglib.h",
     "jpeglibmangler.h",
   ]
-  defines = [ "MANGLE_JPEG_NAMES" ]
 }
 
 if (current_cpu == "x86" || current_cpu == "x64") {
@@ -31,6 +33,11 @@
     defines = []
     include_dirs = [ "simd/nasm/" ]
 
+    inputs = [
+      "simd/nasm/jdct.inc",
+      "simd/nasm/jsimdext.inc",
+    ]
+
     if (current_cpu == "x86") {
       include_dirs += [ "simd/i386/" ]
       sources = [
@@ -138,6 +145,7 @@
 static_library("simd") {
   include_dirs = [ "." ]
   deps = [ ":libjpeg_headers" ]
+  defines = [ "MANGLE_JPEG_NAMES" ]
 
   if (current_cpu == "x86") {
     deps += [ ":simd_asm" ]
@@ -175,7 +183,7 @@
       ]
     }
 
-    defines = [ "NEON_INTRINSICS" ]
+    defines += [ "NEON_INTRINSICS" ]
 
     configs -= [ "//build/config/compiler:default_optimization" ]
     configs += [ "//build/config/compiler:optimize_speed" ]
@@ -190,6 +198,7 @@
 
 config("libjpeg_config") {
   include_dirs = [ "." ]
+  defines = [ "MANGLE_JPEG_NAMES" ]
 }
 
 static_library("libjpeg") {
@@ -248,6 +257,7 @@
   defines = [
     "WITH_SIMD",
     "NO_GETENV",
+    "NO_PUTENV",
   ]
 
   configs += [ ":libjpeg_config" ]
@@ -342,5 +352,9 @@
 
     configs -= [ "//build/config/compiler:chromium_code" ]
     configs += [ "//build/config/compiler:no_chromium_code" ]
+
+    if (is_win) {
+      cflags = [ "-U_CRT_SECURE_NO_DEPRECATE" ]
+    }
   }
 }
diff --git a/BUILDING.md b/BUILDING.md
index f91abcd..b965b5e 100644
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -10,35 +10,24 @@
 
 - [CMake](http://www.cmake.org) v2.8.12 or later
 
-- [NASM](http://www.nasm.us) or [YASM](http://yasm.tortall.net)
+- [NASM](http://www.nasm.us) or [Yasm](http://yasm.tortall.net)
   (if building x86 or x86-64 SIMD extensions)
   * If using NASM, 2.13 or later is required.
-  * If using YASM, 1.2.0 or later is required.
-  * If building on macOS, NASM or YASM can be obtained from
+  * If using Yasm, 1.2.0 or later is required.
+  * If building on macOS, NASM or Yasm can be obtained from
     [MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
      - NOTE: Currently, if it is desirable to hide the SIMD function symbols in
        Mac executables or shared libraries that statically link with
-       libjpeg-turbo, then NASM 2.14 or later or YASM must be used when
+       libjpeg-turbo, then NASM 2.14 or later or Yasm must be used when
        building libjpeg-turbo.
-  * If building on Windows, **nasm.exe**/**yasm.exe** should be in your `PATH`.
-  * NASM and YASM are located in the CRB (Code Ready Builder) repository on
-    Red Hat Enterprise Linux 8 and in the PowerTools repository on CentOS 8,
-    which is not enabled by default.
-
-  The binary RPMs released by the NASM project do not work on older Linux
-  systems, such as Red Hat Enterprise Linux 5.  On such systems, you can easily
-  build and install NASM from a source RPM by downloading one of the SRPMs from
-
-  <http://www.nasm.us/pub/nasm/releasebuilds>
-
-  and executing the following as root:
-
-        ARCH=`uname -m`
-        rpmbuild --rebuild nasm-{version}.src.rpm
-        rpm -Uvh /usr/src/redhat/RPMS/$ARCH/nasm-{version}.$ARCH.rpm
-
-  NOTE: the NASM build will fail if texinfo is not installed.
-
+  * If NASM or Yasm is not in your `PATH`, then you can specify the full path
+    to the assembler by using either the `CMAKE_ASM_NASM_COMPILER` CMake
+    variable or the `ASM_NASM` environment variable.  On Windows, use forward
+    slashes rather than backslashes in the path (for example,
+    **c:/nasm/nasm.exe**).
+  * NASM and Yasm are located in the CRB (Code Ready Builder) or PowerTools
+    repository on Red Hat Enterprise Linux 8+ and derivatives, which is not
+    enabled by default.
 
 ### Un*x Platforms (including Linux, Mac, FreeBSD, Solaris, and Cygwin)
 
@@ -90,6 +79,14 @@
   * If using JDK 11 or later, CMake 3.10.x or later must also be used.
 
 
+Sub-Project Builds
+------------------
+
+The libjpeg-turbo build system does not support being included as a sub-project
+using the CMake `add_subdirectory()` function.  Use the CMake
+`ExternalProject_Add()` function instead.
+
+
 Out-of-Tree Builds
 ------------------
 
@@ -106,8 +103,9 @@
 Ninja
 -----
 
-In all of the procedures and recipes below, replace `make` with `ninja` and
-`Unix Makefiles` with `Ninja` if using Ninja.
+If using Ninja, then replace `make` or `nmake` with `ninja`, and replace the
+CMake generator (specified with the `-G` option) with `Ninja`, in all of the
+procedures and recipes below.
 
 
 Build Procedure
@@ -374,9 +372,13 @@
 
     cd {build_directory}
     cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
+      -DCMAKE_INSTALL_PREFIX={install_path} \
       [additional CMake flags] {source_directory}
     make
 
+*{install\_path}* is the path under which the libjpeg-turbo binaries should be
+installed.
+
 
 ### 64-bit MinGW Build on Un*x (including Mac and Cygwin)
 
@@ -393,9 +395,13 @@
 
     cd {build_directory}
     cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
+      -DCMAKE_INSTALL_PREFIX={install_path} \
       [additional CMake flags] {source_directory}
     make
 
+*{install\_path}* is the path under which the libjpeg-turbo binaries should be
+installed.
+
 
 Building libjpeg-turbo for iOS
 ------------------------------
@@ -431,6 +437,10 @@
       [additional CMake flags] {source_directory}
     make
 
+Replace `iPhoneOS` with `iPhoneSimulator` and `-miphoneos-version-min` with
+`-miphonesimulator-version-min` to build libjpeg-turbo for the iOS simulator on
+Macs with Apple silicon CPUs.
+
 
 Building libjpeg-turbo for Android
 ----------------------------------
diff --git a/ChangeLog.md b/ChangeLog.md
index ca5208b..1c1e653 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -1,7 +1,174 @@
+2.1.5.1
+=======
+
+### Significant changes relative to 2.1.5:
+
+1. The SIMD dispatchers in libjpeg-turbo 2.1.4 and prior stored the list of
+supported SIMD instruction sets in a global variable, which caused an innocuous
+race condition whereby the variable could have been initialized multiple times
+if `jpeg_start_*compress()` was called simultaneously in multiple threads.
+libjpeg-turbo 2.1.5 included an undocumented attempt to fix this race condition
+by making the SIMD support variable thread-local.  However, that caused another
+issue whereby, if `jpeg_start_*compress()` was called in one thread and
+`jpeg_read_*()` or `jpeg_write_*()` was called in a second thread, the SIMD
+support variable was never initialized in the second thread.  On x86 systems,
+this led the second thread to incorrectly assume that AVX2 instructions were
+always available, and when it attempted to use those instructions on older x86
+CPUs that do not support them, an illegal instruction error occurred.  The SIMD
+dispatchers now ensure that the SIMD support variable is initialized before
+dispatching based on its value.
+
+
+2.1.5
+=====
+
+### Significant changes relative to 2.1.4:
+
+1. Fixed issues in the build system whereby, when using the Ninja Multi-Config
+CMake generator, a static build of libjpeg-turbo (a build in which
+`ENABLE_SHARED` is `0`) could not be installed, a Windows installer could not
+be built, and the Java regression tests failed.
+
+2. Fixed a regression introduced by 2.0 beta1[15] that caused a buffer overrun
+in the progressive Huffman encoder when attempting to transform a
+specially-crafted malformed 12-bit-per-component JPEG image into a progressive
+12-bit-per-component JPEG image using a 12-bit-per-component build of
+libjpeg-turbo (`-DWITH_12BIT=1`.)  Given that the buffer overrun was fully
+contained within the progressive Huffman encoder structure and did not cause a
+segfault or other user-visible errant behavior, given that the lossless
+transformer (unlike the decompressor) is not generally exposed to arbitrary
+data exploits, and given that 12-bit-per-component builds of libjpeg-turbo are
+uncommon, this issue did not likely pose a security risk.
+
+3. Fixed an issue whereby, when using a 12-bit-per-component build of
+libjpeg-turbo (`-DWITH_12BIT=1`), passing samples with values greater than 4095
+or less than 0 to `jpeg_write_scanlines()` caused a buffer overrun or underrun
+in the RGB-to-YCbCr color converter.
+
+4. Fixed a floating point exception that occurred when attempting to use the
+jpegtran `-drop` and `-trim` options to losslessly transform a
+specially-crafted malformed JPEG image.
+
+5. Fixed an issue in `tjBufSizeYUV2()` whereby it returned a bogus result,
+rather than throwing an error, if the `align` parameter was not a power of 2.
+Fixed a similar issue in `tjCompressFromYUV()` whereby it generated a corrupt
+JPEG image in certain cases, rather than throwing an error, if the `align`
+parameter was not a power of 2.
+
+6. Fixed an issue whereby `tjDecompressToYUV2()`, which is a wrapper for
+`tjDecompressToYUVPlanes()`, used the desired YUV image dimensions rather than
+the actual scaled image dimensions when computing the plane pointers and
+strides to pass to `tjDecompressToYUVPlanes()`.  This caused a buffer overrun
+and subsequent segfault if the desired image dimensions exceeded the scaled
+image dimensions.
+
+7. Fixed an issue whereby, when decompressing a 12-bit-per-component JPEG image
+(`-DWITH_12BIT=1`) using an alpha-enabled output color space such as
+`JCS_EXT_RGBA`, the alpha channel was set to 255 rather than 4095.
+
+8. Fixed an issue whereby the Java version of TJBench did not accept a range of
+quality values.
+
+9. Fixed an issue whereby, when `-progressive` was passed to TJBench, the JPEG
+input image was not transformed into a progressive JPEG image prior to
+decompression.
+
+
+2.1.4
+=====
+
+### Significant changes relative to 2.1.3:
+
+1. Fixed a regression introduced in 2.1.3 that caused build failures with
+Visual Studio 2010.
+
+2. The `tjDecompressHeader3()` function in the TurboJPEG C API and the
+`TJDecompressor.setSourceImage()` method in the TurboJPEG Java API now accept
+"abbreviated table specification" (AKA "tables-only") datastreams, which can be
+used to prime the decompressor with quantization and Huffman tables that can be
+used when decompressing subsequent "abbreviated image" datastreams.
+
+3. libjpeg-turbo now performs run-time detection of AltiVec instructions on
+OS X/PowerPC systems if AltiVec instructions are not enabled at compile time.
+This allows both AltiVec-equipped (PowerPC G4 and G5) and non-AltiVec-equipped
+(PowerPC G3) CPUs to be supported using the same build of libjpeg-turbo.
+
+4. Fixed an error ("Bogus virtual array access") that occurred when attempting
+to decompress a progressive JPEG image with a height less than or equal to one
+iMCU (8 * the vertical sampling factor) using buffered-image mode with
+interblock smoothing enabled.  This was a regression introduced by
+2.1 beta1[6(b)].
+
+5. Fixed two issues that prevented partial image decompression from working
+properly with buffered-image mode:
+
+     - Attempting to call `jpeg_crop_scanline()` after
+`jpeg_start_decompress()` but before `jpeg_start_output()` resulted in an error
+("Improper call to JPEG library in state 207".)
+     - Attempting to use `jpeg_skip_scanlines()` resulted in an error ("Bogus
+virtual array access") under certain circumstances.
+
+
+2.1.3
+=====
+
+### Significant changes relative to 2.1.2:
+
+1. Fixed a regression introduced by 2.0 beta1[7] whereby cjpeg compressed PGM
+input files into full-color JPEG images unless the `-grayscale` option was
+used.
+
+2. cjpeg now automatically compresses GIF and 8-bit BMP input files into
+grayscale JPEG images if the input files contain only shades of gray.
+
+3. The build system now enables the intrinsics implementation of the AArch64
+(Arm 64-bit) Neon SIMD extensions by default when using GCC 12 or later.
+
+4. Fixed a segfault that occurred while decompressing a 4:2:0 JPEG image using
+the merged (non-fancy) upsampling algorithms (that is, with
+`cinfo.do_fancy_upsampling` set to `FALSE`) along with `jpeg_crop_scanline()`.
+Specifically, the segfault occurred if the number of bytes remaining in the
+output buffer was less than the number of bytes required to represent one
+uncropped scanline of the output image.  For that reason, the issue could only
+be reproduced using the libjpeg API, not using djpeg.
+
+
+2.1.2
+=====
+
+### Significant changes relative to 2.1.1:
+
+1. Fixed a regression introduced by 2.1 beta1[13] that caused the remaining
+GAS implementations of AArch64 (Arm 64-bit) Neon SIMD functions (which are used
+by default with GCC for performance reasons) to be placed in the `.rodata`
+section rather than in the `.text` section.  This caused the GNU linker to
+automatically place the `.rodata` section in an executable segment, which
+prevented libjpeg-turbo from working properly with other linkers and also
+represented a potential security risk.
+
+2. Fixed an issue whereby the `tjTransform()` function incorrectly computed the
+MCU block size for 4:4:4 JPEG images with non-unary sampling factors and thus
+unduly rejected some cropping regions, even though those regions aligned with
+8x8 MCU block boundaries.
+
+3. Fixed a regression introduced by 2.1 beta1[13] that caused the build system
+to enable the Arm Neon SIMD extensions when targetting Armv6 and other legacy
+architectures that do not support Neon instructions.
+
+4. libjpeg-turbo now performs run-time detection of AltiVec instructions on
+FreeBSD/PowerPC systems if AltiVec instructions are not enabled at compile
+time.  This allows both AltiVec-equipped and non-AltiVec-equipped CPUs to be
+supported using the same build of libjpeg-turbo.
+
+5. cjpeg now accepts a `-strict` argument similar to that of djpeg and
+jpegtran, which causes the compressor to abort if an LZW-compressed GIF input
+image contains incomplete or corrupt image data.
+
+
 2.1.1
 =====
 
-### Significant changes relative to 2.1.0
+### Significant changes relative to 2.1.0:
 
 1. Fixed a regression introduced in 2.1.0 that caused build failures with
 non-GCC-compatible compilers for Un*x/Arm platforms.
@@ -15,11 +182,22 @@
 Android systems when running AArch32/Thumb builds of libjpeg-turbo built with
 recent versions of Clang.
 
+4. Added a command-line argument (`-copy icc`) to jpegtran that causes it to
+copy only the ICC profile markers from the source file and discard any other
+metadata.
+
+5. libjpeg-turbo should now build and run on CHERI-enabled architectures, which
+use capability pointers that are larger than the size of `size_t`.
+
+6. Fixed a regression (CVE-2021-37972) introduced by 2.1 beta1[5] that caused a
+segfault in the 64-bit SSE2 Huffman encoder when attempting to losslessly
+transform a specially-crafted malformed JPEG image.
+
 
 2.1.0
 =====
 
-### Significant changes relative to 2.1 beta1
+### Significant changes relative to 2.1 beta1:
 
 1. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to
 decompress certain progressive JPEG images with one or more component planes of
@@ -54,10 +232,10 @@
 ["Two Issues with the JPEG Standard"](https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf).
 
 7. The PPM reader now throws an error, rather than segfaulting (due to a buffer
-overrun) or generating incorrect pixels, if an application attempts to use the
-`tjLoadImage()` function to load a 16-bit binary PPM file (a binary PPM file
-with a maximum value greater than 255) into a grayscale image buffer or to load
-a 16-bit binary PGM file into an RGB image buffer.
+overrun, CVE-2021-46822) or generating incorrect pixels, if an application
+attempts to use the `tjLoadImage()` function to load a 16-bit binary PPM file
+(a binary PPM file with a maximum value greater than 255) into a grayscale
+image buffer or to load a 16-bit binary PGM file into an RGB image buffer.
 
 8. Fixed an issue in the PPM reader that caused incorrect pixels to be
 generated when using the `tjLoadImage()` function to load a 16-bit binary PPM
@@ -223,11 +401,11 @@
 
 2. Fixed or worked around multiple issues with `jpeg_skip_scanlines()`:
 
-     - Fixed segfaults or "Corrupt JPEG data: premature end of data segment"
-errors in `jpeg_skip_scanlines()` that occurred when decompressing 4:2:2 or
-4:2:0 JPEG images using merged (non-fancy) upsampling/color conversion (that
-is, when setting `cinfo.do_fancy_upsampling` to `FALSE`.)  2.0.0[6] was a
-similar fix, but it did not cover all cases.
+     - Fixed segfaults (CVE-2020-35538) or "Corrupt JPEG data: premature end of
+data segment" errors in `jpeg_skip_scanlines()` that occurred when
+decompressing 4:2:2 or 4:2:0 JPEG images using merged (non-fancy)
+upsampling/color conversion (that is, when setting `cinfo.do_fancy_upsampling`
+to `FALSE`.)  2.0.0[6] was a similar fix, but it did not cover all cases.
      - `jpeg_skip_scanlines()` now throws an error if two-pass color
 quantization is enabled.  Two-pass color quantization never worked properly
 with `jpeg_skip_scanlines()`, and the issues could not readily be fixed.
@@ -598,7 +776,7 @@
 now produces bitwise-identical results to the unmerged algorithms.
 
 12. The SIMD function symbols for x86[-64]/ELF, MIPS/ELF, macOS/x86[-64] (if
-libjpeg-turbo is built with YASM), and iOS/Arm[64] builds are now private.
+libjpeg-turbo is built with Yasm), and iOS/Arm[64] builds are now private.
 This prevents those symbols from being exposed in applications or shared
 libraries that link statically with libjpeg-turbo.
 
@@ -1483,8 +1661,8 @@
 
 ### Significant changes relative to 1.2 beta1:
 
-1. Fixed build issue with YASM on Unix systems (the libjpeg-turbo build system
-was not adding the current directory to the assembler include path, so YASM
+1. Fixed build issue with Yasm on Unix systems (the libjpeg-turbo build system
+was not adding the current directory to the assembler include path, so Yasm
 was not able to find jsimdcfg.inc.)
 
 2. Fixed out-of-bounds read in SSE2 SIMD code that occurred when decompressing
@@ -1552,7 +1730,7 @@
 8. All legacy VirtualGL code has been re-factored, and this has allowed
 libjpeg-turbo, in its entirety, to be re-licensed under a BSD-style license.
 
-9. libjpeg-turbo can now be built with YASM.
+9. libjpeg-turbo can now be built with Yasm.
 
 10. Added SIMD acceleration for ARM Linux and iOS platforms that support
 NEON instructions.
diff --git a/LICENSE.md b/LICENSE.md
index a1cdad5..bf8a7fd 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -91,7 +91,7 @@
 The Modified (3-clause) BSD License
 ===================================
 
-Copyright (C)2009-2021 D. R. Commander.  All Rights Reserved.<br>
+Copyright (C)2009-2023 D. R. Commander.  All Rights Reserved.<br>
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/METADATA b/METADATA
index afa6cd7..c37f6c5 100644
--- a/METADATA
+++ b/METADATA
@@ -1,18 +1,22 @@
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update libjpeg-turbo
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
+
 name: "libjpeg-turbo"
 description: "libjpeg-turbo is a JPEG image codec."
 third_party {
-  url {
-    type: GIT
-    value: "https://chromium.googlesource.com/chromium/deps/libjpeg_turbo"
-  }
-  version: "ad8b3b0f84baf155f3bde5626c3bf9d20535bcae"
   license_type: NOTICE
-  last_upgrade_date {
-    year: 2021
-    month: 7
-    day: 15
-  }
   security {
-      tag: "NVD-CPE2.3:cpe:/a:libjpeg-turbo:libjpeg-turbo:2.1.0"
+    tag: "NVD-CPE2.3:cpe:/a:libjpeg-turbo:libjpeg-turbo:2.1.0"
+  }
+  last_upgrade_date {
+    year: 2024
+    month: 1
+    day: 8
+  }
+  identifier {
+    type: "Git"
+    value: "https://chromium.googlesource.com/chromium/deps/libjpeg_turbo"
+    version: "9b894306ec3b28cea46e84c32b56773a98c483da"
   }
 }
diff --git a/README.chromium b/README.chromium
index de1fe85..78e33e2 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,25 +1,28 @@
 Name: libjpeg-turbo
 URL: https://github.com/libjpeg-turbo/libjpeg-turbo/
-Version: b201838d8b5f2f80c9f86ec8405a62a002232b2c (post 2.1.0)
-License: Custom license
+Version: 2.1.5.1
+License: IJG, BSD-3, Zlib
 License File: LICENSE.md
 Security Critical: yes
+Shipped: yes
 License Android Compatible: yes
 
 Description:
 This consists of the components:
-* libjpeg-turbo b201838d8b5f2f80c9f86ec8405a62a002232b2c (post 2.1.0)
+* libjpeg-turbo 2.1.5.1
 * This file (README.chromium)
 * A build file (BUILD.gn)
 * An OWNERS file
 * A codereview.settings file
+* A DIR_METADATA file
 * Patched header files used by Chromium
-* Deleted unused directories: cmakescripts, doc, fuzz, java, release,
-  sharedlib, simd/loongson, simd/mips, simd/powerpc, and win
-* Deleted unused files: appveyor.yml, CMakeLists.txt, doxygen.config,
-  doxygen-extra.css, .gitattributes, md5/CMakeLists.txt, md5/md5cmp.c,
-  simd/CMakeLists.txt, tjexample.c, tjexampletest.in, tjexampletest.java.in and
-  .travis.yml
+* Deleted unused directories: .github, cmakescripts, doc, fuzz, java, release,
+  sharedlib, simd/mips, simd/mips64, simd/powerpc, and win
+* Deleted unused files: appveyor.yml, CMakeLists.txt, cjpeg.1, croptest.in,
+  djpeg.1, doxygen.config, doxygen-extra.css, example.txt, .gitattributes,
+  jpegtran.1, md5/CMakeLists.txt, md5/md5cmp.c, rdjpgcom.1, simd/CMakeLists.txt,
+  strtest.c, tjbenchtest.in, tjbenchtest.java.in, tjexample.c, tjexampletest.in,
+  tjexampletest.java.in and wrjpgcom.1
 * Deleted legacy Arm Neon assembly files (supporting old compiler versions that
   do not generate performant code from intrinsics):
   simd/arm/aarch32/jsimd_neon.S, simd/arm/aarch64/jsimd_neon.S.
diff --git a/cdjpeg.c b/cdjpeg.c
index 5278c1d..304a665 100644
--- a/cdjpeg.c
+++ b/cdjpeg.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2019, D. R. Commander.
+ * Copyright (C) 2019, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -34,7 +34,7 @@
     int scan_no = ((j_decompress_ptr)cinfo)->input_scan_number;
 
     if (scan_no > (int)prog->max_scans) {
-      fprintf(stderr, "Scan number %d exceeds maximum scans (%d)\n", scan_no,
+      fprintf(stderr, "Scan number %d exceeds maximum scans (%u)\n", scan_no,
               prog->max_scans);
       exit(EXIT_FAILURE);
     }
diff --git a/cjpeg.1 b/cjpeg.1
deleted file mode 100644
index 569dc3f..0000000
--- a/cjpeg.1
+++ /dev/null
@@ -1,360 +0,0 @@
-.TH CJPEG 1 "4 November 2020"
-.SH NAME
-cjpeg \- compress an image file to a JPEG file
-.SH SYNOPSIS
-.B cjpeg
-[
-.I options
-]
-[
-.I filename
-]
-.LP
-.SH DESCRIPTION
-.LP
-.B cjpeg
-compresses the named image file, or the standard input if no file is
-named, and produces a JPEG/JFIF file on the standard output.
-The currently supported input file formats are: PPM (PBMPLUS color
-format), PGM (PBMPLUS grayscale format), BMP, GIF, and Targa.
-.SH OPTIONS
-All switch names may be abbreviated; for example,
-.B \-grayscale
-may be written
-.B \-gray
-or
-.BR \-gr .
-Most of the "basic" switches can be abbreviated to as little as one letter.
-Upper and lower case are equivalent (thus
-.B \-BMP
-is the same as
-.BR \-bmp ).
-British spellings are also accepted (e.g.,
-.BR \-greyscale ),
-though for brevity these are not mentioned below.
-.PP
-The basic switches are:
-.TP
-.BI \-quality " N[,...]"
-Scale quantization tables to adjust image quality.  Quality is 0 (worst) to
-100 (best); default is 75.  (See below for more info.)
-.TP
-.B \-grayscale
-Create monochrome JPEG file from color input.  Be sure to use this switch when
-compressing a grayscale BMP or GIF file, because
-.B cjpeg
-isn't bright enough to notice whether a BMP or GIF file uses only shades of
-gray.  By saying
-.BR \-grayscale,
-you'll get a smaller JPEG file that takes less time to process.
-.TP
-.B \-rgb
-Create RGB JPEG file.
-Using this switch suppresses the conversion from RGB
-colorspace input to the default YCbCr JPEG colorspace.
-.TP
-.B \-optimize
-Perform optimization of entropy encoding parameters.  Without this, default
-encoding parameters are used.
-.B \-optimize
-usually makes the JPEG file a little smaller, but
-.B cjpeg
-runs somewhat slower and needs much more memory.  Image quality and speed of
-decompression are unaffected by
-.BR \-optimize .
-.TP
-.B \-progressive
-Create progressive JPEG file (see below).
-.TP
-.B \-targa
-Input file is Targa format.  Targa files that contain an "identification"
-field will not be automatically recognized by
-.BR cjpeg ;
-for such files you must specify
-.B \-targa
-to make
-.B cjpeg
-treat the input as Targa format.
-For most Targa files, you won't need this switch.
-.PP
-The
-.B \-quality
-switch lets you trade off compressed file size against quality of the
-reconstructed image: the higher the quality setting, the larger the JPEG file,
-and the closer the output image will be to the original input.  Normally you
-want to use the lowest quality setting (smallest file) that decompresses into
-something visually indistinguishable from the original image.  For this
-purpose the quality setting should generally be between 50 and 95 (the default
-is 75) for photographic images.  If you see defects at
-.B \-quality
-75, then go up 5 or 10 counts at a time until you are happy with the output
-image.  (The optimal setting will vary from one image to another.)
-.PP
-.B \-quality
-100 will generate a quantization table of all 1's, minimizing loss in the
-quantization step (but there is still information loss in subsampling, as well
-as roundoff error.)  For most images, specifying a quality value above
-about 95 will increase the size of the compressed file dramatically, and while
-the quality gain from these higher quality values is measurable (using metrics
-such as PSNR or SSIM), it is rarely perceivable by human vision.
-.PP
-In the other direction, quality values below 50 will produce very small files
-of low image quality.  Settings around 5 to 10 might be useful in preparing an
-index of a large image library, for example.  Try
-.B \-quality
-2 (or so) for some amusing Cubist effects.  (Note: quality
-values below about 25 generate 2-byte quantization tables, which are
-considered optional in the JPEG standard.
-.B cjpeg
-emits a warning message when you give such a quality value, because some
-other JPEG programs may be unable to decode the resulting file.  Use
-.B \-baseline
-if you need to ensure compatibility at low quality values.)
-.PP
-The \fB-quality\fR option has been extended in this version of \fBcjpeg\fR to
-support separate quality settings for luminance and chrominance (or, in
-general, separate settings for every quantization table slot.)  The principle
-is the same as chrominance subsampling:  since the human eye is more sensitive
-to spatial changes in brightness than spatial changes in color, the chrominance
-components can be quantized more than the luminance components without
-incurring any visible image quality loss.  However, unlike subsampling, this
-feature reduces data in the frequency domain instead of the spatial domain,
-which allows for more fine-grained control.  This option is useful in
-quality-sensitive applications, for which the artifacts generated by
-subsampling may be unacceptable.
-.PP
-The \fB-quality\fR option accepts a comma-separated list of parameters, which
-respectively refer to the quality levels that should be assigned to the
-quantization table slots.  If there are more q-table slots than parameters,
-then the last parameter is replicated.  Thus, if only one quality parameter is
-given, this is used for both luminance and chrominance (slots 0 and 1,
-respectively), preserving the legacy behavior of cjpeg v6b and prior.
-More (or customized) quantization tables can be set with the \fB-qtables\fR
-option and assigned to components with the \fB-qslots\fR option (see the
-"wizard" switches below.)
-.PP
-JPEG files generated with separate luminance and chrominance quality are fully
-compliant with standard JPEG decoders.
-.PP
-.BR CAUTION:
-For this setting to be useful, be sure to pass an argument of \fB-sample 1x1\fR
-to \fBcjpeg\fR to disable chrominance subsampling.  Otherwise, the default
-subsampling level (2x2, AKA "4:2:0") will be used.
-.PP
-The
-.B \-progressive
-switch creates a "progressive JPEG" file.  In this type of JPEG file, the data
-is stored in multiple scans of increasing quality.  If the file is being
-transmitted over a slow communications link, the decoder can use the first
-scan to display a low-quality image very quickly, and can then improve the
-display with each subsequent scan.  The final image is exactly equivalent to a
-standard JPEG file of the same quality setting, and the total file size is
-about the same --- often a little smaller.
-.PP
-Switches for advanced users:
-.TP
-.B \-arithmetic
-Use arithmetic coding.
-.B Caution:
-arithmetic coded JPEG is not yet widely implemented, so many decoders will be
-unable to view an arithmetic coded JPEG file at all.
-.TP
-.B \-dct int
-Use accurate integer DCT method (default).
-.TP
-.B \-dct fast
-Use less accurate integer DCT method [legacy feature].
-When the Independent JPEG Group's software was first released in 1991, the
-compression time for a 1-megapixel JPEG image on a mainstream PC was measured
-in minutes.  Thus, the \fBfast\fR integer DCT algorithm provided noticeable
-performance benefits.  On modern CPUs running libjpeg-turbo, however, the
-compression time for a 1-megapixel JPEG image is measured in milliseconds, and
-thus the performance benefits of the \fBfast\fR algorithm are much less
-noticeable.  On modern x86/x86-64 CPUs that support AVX2 instructions, the
-\fBfast\fR and \fBint\fR methods have similar performance.  On other types of
-CPUs, the \fBfast\fR method is generally about 5-15% faster than the \fBint\fR
-method.
-
-For quality levels of 90 and below, there should be little or no perceptible
-quality difference between the two algorithms.  For quality levels above 90,
-however, the difference between the \fBfast\fR and \fBint\fR methods becomes
-more pronounced.  With quality=97, for instance, the \fBfast\fR method incurs
-generally about a 1-3 dB loss in PSNR relative to the \fBint\fR method, but
-this can be larger for some images.  Do not use the \fBfast\fR method with
-quality levels above 97.  The algorithm often degenerates at quality=98 and
-above and can actually produce a more lossy image than if lower quality levels
-had been used.  Also, in libjpeg-turbo, the \fBfast\fR method is not fully
-accelerated for quality levels above 97, so it will be slower than the
-\fBint\fR method.
-.TP
-.B \-dct float
-Use floating-point DCT method [legacy feature].
-The \fBfloat\fR method does not produce significantly more accurate results
-than the \fBint\fR method, and it is much slower.  The \fBfloat\fR method may
-also give different results on different machines due to varying roundoff
-behavior, whereas the integer methods should give the same results on all
-machines.
-.TP
-.BI \-icc " file"
-Embed ICC color management profile contained in the specified file.
-.TP
-.BI \-restart " N"
-Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is
-attached to the number.
-.B \-restart 0
-(the default) means no restart markers.
-.TP
-.BI \-smooth " N"
-Smooth the input image to eliminate dithering noise.  N, ranging from 1 to
-100, indicates the strength of smoothing.  0 (the default) means no smoothing.
-.TP
-.BI \-maxmemory " N"
-Set limit for amount of memory to use in processing large images.  Value is
-in thousands of bytes, or millions of bytes if "M" is attached to the
-number.  For example,
-.B \-max 4m
-selects 4000000 bytes.  If more space is needed, an error will occur.
-.TP
-.BI \-outfile " name"
-Send output image to the named file, not to standard output.
-.TP
-.BI \-memdst
-Compress to memory instead of a file.  This feature was implemented mainly as a
-way of testing the in-memory destination manager (jpeg_mem_dest()), but it is
-also useful for benchmarking, since it reduces the I/O overhead.
-.TP
-.BI \-report
-Report compression progress.
-.TP
-.B \-verbose
-Enable debug printout.  More
-.BR \-v 's
-give more output.  Also, version information is printed at startup.
-.TP
-.B \-debug
-Same as
-.BR \-verbose .
-.TP
-.B \-version
-Print version information and exit.
-.PP
-The
-.B \-restart
-option inserts extra markers that allow a JPEG decoder to resynchronize after
-a transmission error.  Without restart markers, any damage to a compressed
-file will usually ruin the image from the point of the error to the end of the
-image; with restart markers, the damage is usually confined to the portion of
-the image up to the next restart marker.  Of course, the restart markers
-occupy extra space.  We recommend
-.B \-restart 1
-for images that will be transmitted across unreliable networks such as Usenet.
-.PP
-The
-.B \-smooth
-option filters the input to eliminate fine-scale noise.  This is often useful
-when converting dithered images to JPEG: a moderate smoothing factor of 10 to
-50 gets rid of dithering patterns in the input file, resulting in a smaller
-JPEG file and a better-looking image.  Too large a smoothing factor will
-visibly blur the image, however.
-.PP
-Switches for wizards:
-.TP
-.B \-baseline
-Force baseline-compatible quantization tables to be generated.  This clamps
-quantization values to 8 bits even at low quality settings.  (This switch is
-poorly named, since it does not ensure that the output is actually baseline
-JPEG.  For example, you can use
-.B \-baseline
-and
-.B \-progressive
-together.)
-.TP
-.BI \-qtables " file"
-Use the quantization tables given in the specified text file.
-.TP
-.BI \-qslots " N[,...]"
-Select which quantization table to use for each color component.
-.TP
-.BI \-sample " HxV[,...]"
-Set JPEG sampling factors for each color component.
-.TP
-.BI \-scans " file"
-Use the scan script given in the specified text file.
-.PP
-The "wizard" switches are intended for experimentation with JPEG.  If you
-don't know what you are doing, \fBdon't use them\fR.  These switches are
-documented further in the file wizard.txt.
-.SH EXAMPLES
-.LP
-This example compresses the PPM file foo.ppm with a quality factor of
-60 and saves the output as foo.jpg:
-.IP
-.B cjpeg \-quality
-.I 60 foo.ppm
-.B >
-.I foo.jpg
-.SH HINTS
-Color GIF files are not the ideal input for JPEG; JPEG is really intended for
-compressing full-color (24-bit) images.  In particular, don't try to convert
-cartoons, line drawings, and other images that have only a few distinct
-colors.  GIF works great on these, JPEG does not.  If you want to convert a
-GIF to JPEG, you should experiment with
-.BR cjpeg 's
-.B \-quality
-and
-.B \-smooth
-options to get a satisfactory conversion.
-.B \-smooth 10
-or so is often helpful.
-.PP
-Avoid running an image through a series of JPEG compression/decompression
-cycles.  Image quality loss will accumulate; after ten or so cycles the image
-may be noticeably worse than it was after one cycle.  It's best to use a
-lossless format while manipulating an image, then convert to JPEG format when
-you are ready to file the image away.
-.PP
-The
-.B \-optimize
-option to
-.B cjpeg
-is worth using when you are making a "final" version for posting or archiving.
-It's also a win when you are using low quality settings to make very small
-JPEG files; the percentage improvement is often a lot more than it is on
-larger files.  (At present,
-.B \-optimize
-mode is always selected when generating progressive JPEG files.)
-.SH ENVIRONMENT
-.TP
-.B JPEGMEM
-If this environment variable is set, its value is the default memory limit.
-The value is specified as described for the
-.B \-maxmemory
-switch.
-.B JPEGMEM
-overrides the default value specified when the program was compiled, and
-itself is overridden by an explicit
-.BR \-maxmemory .
-.SH SEE ALSO
-.BR djpeg (1),
-.BR jpegtran (1),
-.BR rdjpgcom (1),
-.BR wrjpgcom (1)
-.br
-.BR ppm (5),
-.BR pgm (5)
-.br
-Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
-Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44.
-.SH AUTHOR
-Independent JPEG Group
-.PP
-This file was modified by The libjpeg-turbo Project to include only information
-relevant to libjpeg-turbo, to wordsmith certain sections, and to describe
-features not present in libjpeg.
-.SH ISSUES
-Not all variants of BMP and Targa file formats are supported.
-.PP
-The
-.B \-targa
-switch is not a bug, it's a feature.  (It would be a bug if the Targa format
-designers had not been clueless.)
diff --git a/cjpeg.c b/cjpeg.c
index 66ac28f..12eb4ab 100644
--- a/cjpeg.c
+++ b/cjpeg.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2003-2011 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2013-2014, 2017, 2019-2021, D. R. Commander.
+ * Copyright (C) 2010, 2013-2014, 2017, 2019-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -27,6 +27,10 @@
  * works regardless of which command line style is used.
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #ifdef CJPEG_FUZZER
 #define JPEG_INTERNALS
 #endif
@@ -34,21 +38,6 @@
 #include "jversion.h"           /* for version message */
 #include "jconfigint.h"
 
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
-extern void *malloc(size_t size);
-extern void free(void *ptr);
-#endif
-
-#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
-#ifdef __MWERKS__
-#include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>            /* ... and this */
-#endif
-#ifdef THINK_C
-#include <console.h>            /* Think declares it here */
-#endif
-#endif
-
 
 /* Create the add-on message string table. */
 
@@ -147,6 +136,7 @@
 static char *outfilename;       /* for -outfile switch */
 boolean memdst;                 /* for -memdst switch */
 boolean report;                 /* for -report switch */
+boolean strict;                 /* for -strict switch */
 
 
 #ifdef CJPEG_FUZZER
@@ -165,7 +155,7 @@
   longjmp(myerr->setjmp_buffer, 1);
 }
 
-static void my_emit_message(j_common_ptr cinfo, int msg_level)
+static void my_emit_message_fuzzer(j_common_ptr cinfo, int msg_level)
 {
   if (msg_level < 0)
     cinfo->err->num_warnings++;
@@ -240,6 +230,7 @@
   fprintf(stderr, "  -memdst        Compress to memory instead of file (useful for benchmarking)\n");
 #endif
   fprintf(stderr, "  -report        Report compression progress\n");
+  fprintf(stderr, "  -strict        Treat all warnings as fatal\n");
   fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
   fprintf(stderr, "  -version       Print version information and exit\n");
   fprintf(stderr, "Switches for wizards:\n");
@@ -285,6 +276,7 @@
   outfilename = NULL;
   memdst = FALSE;
   report = FALSE;
+  strict = FALSE;
   cinfo->err->trace_level = 0;
 
   /* Scan command line options, adjust parameters */
@@ -493,6 +485,9 @@
         usage();
       cinfo->smoothing_factor = val;
 
+    } else if (keymatch(arg, "strict", 2)) {
+      strict = TRUE;
+
     } else if (keymatch(arg, "targa", 1)) {
       /* Input file is Targa format. */
       is_targa = TRUE;
@@ -540,6 +535,19 @@
 }
 
 
+METHODDEF(void)
+my_emit_message(j_common_ptr cinfo, int msg_level)
+{
+  if (msg_level < 0) {
+    /* Treat warning as fatal */
+    cinfo->err->error_exit(cinfo);
+  } else {
+    if (cinfo->err->trace_level >= msg_level)
+      cinfo->err->output_message(cinfo);
+  }
+}
+
+
 /*
  * The main program.
  */
@@ -570,11 +578,6 @@
   unsigned long outsize = 0;
   JDIMENSION num_scanlines;
 
-  /* On Mac, fetch a command line. */
-#ifdef USE_CCOMMAND
-  argc = ccommand(&argv);
-#endif
-
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
     progname = "cjpeg";         /* in case C library doesn't provide it */
@@ -604,6 +607,9 @@
 
   file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
 
+  if (strict)
+    jerr.emit_message = my_emit_message;
+
 #ifdef TWO_FILE_COMMANDLINE
   if (!memdst) {
     /* Must have either -outfile switch or explicit output file name */
@@ -681,7 +687,7 @@
 
 #ifdef CJPEG_FUZZER
   jerr.error_exit = my_error_exit;
-  jerr.emit_message = my_emit_message;
+  jerr.emit_message = my_emit_message_fuzzer;
   if (setjmp(myerr.setjmp_buffer))
     HANDLE_ERROR()
 #endif
diff --git a/cmyk.h b/cmyk.h
index 48187a8..b6ca20f 100644
--- a/cmyk.h
+++ b/cmyk.h
@@ -17,7 +17,6 @@
 #include <jinclude.h>
 #define JPEG_INTERNALS
 #include <jpeglib.h>
-#include "jconfigint.h"
 
 
 /* Fully reversible */
diff --git a/croptest.in b/croptest.in
deleted file mode 100644
index 7e3c293..0000000
--- a/croptest.in
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/bin/bash
-
-set -u
-set -e
-trap onexit INT
-trap onexit TERM
-trap onexit EXIT
-
-onexit()
-{
-	if [ -d $OUTDIR ]; then
-		rm -rf $OUTDIR
-	fi
-}
-
-runme()
-{
-	echo \*\*\* $*
-	$*
-}
-
-IMAGE=vgl_6548_0026a.bmp
-WIDTH=128
-HEIGHT=95
-IMGDIR=@CMAKE_CURRENT_SOURCE_DIR@/testimages
-OUTDIR=`mktemp -d /tmp/__croptest_output.XXXXXX`
-EXEDIR=@CMAKE_CURRENT_BINARY_DIR@
-
-if [ -d $OUTDIR ]; then
-	rm -rf $OUTDIR
-fi
-mkdir -p $OUTDIR
-
-exec >$EXEDIR/croptest.log
-
-echo "============================================================"
-echo "$IMAGE ($WIDTH x $HEIGHT)"
-echo "============================================================"
-echo
-
-for PROGARG in "" -progressive; do
-
-	cp $IMGDIR/$IMAGE $OUTDIR
-	basename=`basename $IMAGE .bmp`
-	echo "------------------------------------------------------------"
-	echo "Generating test images"
-	echo "------------------------------------------------------------"
-	echo
-	runme $EXEDIR/cjpeg $PROGARG -grayscale -outfile $OUTDIR/${basename}_GRAY.jpg $IMGDIR/${basename}.bmp
-	runme $EXEDIR/cjpeg $PROGARG -sample 2x2 -outfile $OUTDIR/${basename}_420.jpg $IMGDIR/${basename}.bmp
-	runme $EXEDIR/cjpeg $PROGARG -sample 2x1 -outfile $OUTDIR/${basename}_422.jpg $IMGDIR/${basename}.bmp
-	runme $EXEDIR/cjpeg $PROGARG -sample 1x2 -outfile $OUTDIR/${basename}_440.jpg $IMGDIR/${basename}.bmp
-	runme $EXEDIR/cjpeg $PROGARG -sample 1x1 -outfile $OUTDIR/${basename}_444.jpg $IMGDIR/${basename}.bmp
-	echo
-
-	for NSARG in "" -nosmooth; do
-
-		for COLORSARG in "" "-colors 256 -dither none -onepass"; do
-
-			for Y in {0..16}; do
-
-				for H in {1..16}; do
-
-					X=$(( (Y*16)%128 ))
-					W=$(( WIDTH-X-7 ))
-					if [ $Y -le 15 ]; then
-						CROPSPEC="${W}x${H}+${X}+${Y}"
-					else
-						Y2=$(( HEIGHT-H ));
-						CROPSPEC="${W}x${H}+${X}+${Y2}"
-					fi
-
-					echo "------------------------------------------------------------"
-					echo $PROGARG $NSARG $COLORSARG -crop $CROPSPEC
-					echo "------------------------------------------------------------"
-					echo
-					for samp in GRAY 420 422 440 444; do
-						$EXEDIR/djpeg $NSARG $COLORSARG -rgb -outfile $OUTDIR/${basename}_${samp}_full.ppm $OUTDIR/${basename}_${samp}.jpg
-						convert -crop $CROPSPEC $OUTDIR/${basename}_${samp}_full.ppm $OUTDIR/${basename}_${samp}_ref.ppm
-						runme $EXEDIR/djpeg $NSARG $COLORSARG -crop $CROPSPEC -rgb -outfile $OUTDIR/${basename}_${samp}.ppm $OUTDIR/${basename}_${samp}.jpg
-						runme cmp $OUTDIR/${basename}_${samp}.ppm $OUTDIR/${basename}_${samp}_ref.ppm
-					done
-					echo
-
-				done
-
-			done
-
-		done
-
-	done
-
-done
-
-echo SUCCESS!
diff --git a/djpeg.1 b/djpeg.1
deleted file mode 100644
index 31431b9..0000000
--- a/djpeg.1
+++ /dev/null
@@ -1,320 +0,0 @@
-.TH DJPEG 1 "4 November 2020"
-.SH NAME
-djpeg \- decompress a JPEG file to an image file
-.SH SYNOPSIS
-.B djpeg
-[
-.I options
-]
-[
-.I filename
-]
-.LP
-.SH DESCRIPTION
-.LP
-.B djpeg
-decompresses the named JPEG file, or the standard input if no file is named,
-and produces an image file on the standard output.  PBMPLUS (PPM/PGM), BMP,
-GIF, or Targa output format can be selected.
-.SH OPTIONS
-All switch names may be abbreviated; for example,
-.B \-grayscale
-may be written
-.B \-gray
-or
-.BR \-gr .
-Most of the "basic" switches can be abbreviated to as little as one letter.
-Upper and lower case are equivalent (thus
-.B \-BMP
-is the same as
-.BR \-bmp ).
-British spellings are also accepted (e.g.,
-.BR \-greyscale ),
-though for brevity these are not mentioned below.
-.PP
-The basic switches are:
-.TP
-.BI \-colors " N"
-Reduce image to at most N colors.  This reduces the number of colors used in
-the output image, so that it can be displayed on a colormapped display or
-stored in a colormapped file format.  For example, if you have an 8-bit
-display, you'd need to reduce to 256 or fewer colors.
-.TP
-.BI \-quantize " N"
-Same as
-.BR \-colors .
-.B \-colors
-is the recommended name,
-.B \-quantize
-is provided only for backwards compatibility.
-.TP
-.B \-fast
-Select recommended processing options for fast, low quality output.  (The
-default options are chosen for highest quality output.)  Currently, this is
-equivalent to \fB\-dct fast \-nosmooth \-onepass \-dither ordered\fR.
-.TP
-.B \-grayscale
-Force grayscale output even if JPEG file is color.  Useful for viewing on
-monochrome displays; also,
-.B djpeg
-runs noticeably faster in this mode.
-.TP
-.B \-rgb
-Force RGB output even if JPEG file is grayscale.
-.TP
-.BI \-scale " M/N"
-Scale the output image by a factor M/N.  Currently the scale factor must be
-M/8, where M is an integer between 1 and 16 inclusive, or any reduced fraction
-thereof (such as 1/2, 3/4, etc.)  Scaling is handy if the image is larger than
-your screen; also,
-.B djpeg
-runs much faster when scaling down the output.
-.TP
-.B \-bmp
-Select BMP output format (Windows flavor).  8-bit colormapped format is
-emitted if
-.B \-colors
-or
-.B \-grayscale
-is specified, or if the JPEG file is grayscale; otherwise, 24-bit full-color
-format is emitted.
-.TP
-.B \-gif
-Select GIF output format (LZW-compressed).  Since GIF does not support more
-than 256 colors,
-.B \-colors 256
-is assumed (unless you specify a smaller number of colors).  If you specify
-.BR \-fast,
-the default number of colors is 216.
-.TP
-.B \-gif0
-Select GIF output format (uncompressed).  Since GIF does not support more than
-256 colors,
-.B \-colors 256
-is assumed (unless you specify a smaller number of colors).  If you specify
-.BR \-fast,
-the default number of colors is 216.
-.TP
-.B \-os2
-Select BMP output format (OS/2 1.x flavor).  8-bit colormapped format is
-emitted if
-.B \-colors
-or
-.B \-grayscale
-is specified, or if the JPEG file is grayscale; otherwise, 24-bit full-color
-format is emitted.
-.TP
-.B \-pnm
-Select PBMPLUS (PPM/PGM) output format (this is the default format).
-PGM is emitted if the JPEG file is grayscale or if
-.B \-grayscale
-is specified; otherwise PPM is emitted.
-.TP
-.B \-targa
-Select Targa output format.  Grayscale format is emitted if the JPEG file is
-grayscale or if
-.B \-grayscale
-is specified; otherwise, colormapped format is emitted if
-.B \-colors
-is specified; otherwise, 24-bit full-color format is emitted.
-.PP
-Switches for advanced users:
-.TP
-.B \-dct int
-Use accurate integer DCT method (default).
-.TP
-.B \-dct fast
-Use less accurate integer DCT method [legacy feature].
-When the Independent JPEG Group's software was first released in 1991, the
-decompression time for a 1-megapixel JPEG image on a mainstream PC was measured
-in minutes.  Thus, the \fBfast\fR integer DCT algorithm provided noticeable
-performance benefits.  On modern CPUs running libjpeg-turbo, however, the
-decompression time for a 1-megapixel JPEG image is measured in milliseconds,
-and thus the performance benefits of the \fBfast\fR algorithm are much less
-noticeable.  On modern x86/x86-64 CPUs that support AVX2 instructions, the
-\fBfast\fR and \fBint\fR methods have similar performance.  On other types of
-CPUs, the \fBfast\fR method is generally about 5-15% faster than the \fBint\fR
-method.
-
-If the JPEG image was compressed using a quality level of 85 or below, then
-there should be little or no perceptible quality difference between the two
-algorithms.  When decompressing images that were compressed using quality
-levels above 85, however, the difference between the \fBfast\fR and \fBint\fR
-methods becomes more pronounced.  With images compressed using quality=97, for
-instance, the \fBfast\fR method incurs generally about a 4-6 dB loss in PSNR
-relative to the \fBint\fR method, but this can be larger for some images.  If
-you can avoid it, do not use the \fBfast\fR method when decompressing images
-that were compressed using quality levels above 97.  The algorithm often
-degenerates for such images and can actually produce a more lossy output image
-than if the JPEG image had been compressed using lower quality levels.
-.TP
-.B \-dct float
-Use floating-point DCT method [legacy feature].
-The \fBfloat\fR method does not produce significantly more accurate results
-than the \fBint\fR method, and it is much slower.  The \fBfloat\fR method may
-also give different results on different machines due to varying roundoff
-behavior, whereas the integer methods should give the same results on all
-machines.
-.TP
-.B \-dither fs
-Use Floyd-Steinberg dithering in color quantization.
-.TP
-.B \-dither ordered
-Use ordered dithering in color quantization.
-.TP
-.B \-dither none
-Do not use dithering in color quantization.
-By default, Floyd-Steinberg dithering is applied when quantizing colors; this
-is slow but usually produces the best results.  Ordered dither is a compromise
-between speed and quality; no dithering is fast but usually looks awful.  Note
-that these switches have no effect unless color quantization is being done.
-Ordered dither is only available in
-.B \-onepass
-mode.
-.TP
-.BI \-icc " file"
-Extract ICC color management profile to the specified file.
-.TP
-.BI \-map " file"
-Quantize to the colors used in the specified image file.  This is useful for
-producing multiple files with identical color maps, or for forcing a
-predefined set of colors to be used.  The
-.I file
-must be a GIF or PPM file. This option overrides
-.B \-colors
-and
-.BR \-onepass .
-.TP
-.B \-nosmooth
-Use a faster, lower-quality upsampling routine.
-.TP
-.B \-onepass
-Use one-pass instead of two-pass color quantization.  The one-pass method is
-faster and needs less memory, but it produces a lower-quality image.
-.B \-onepass
-is ignored unless you also say
-.B \-colors
-.IR N .
-Also, the one-pass method is always used for grayscale output (the two-pass
-method is no improvement then).
-.TP
-.BI \-maxmemory " N"
-Set limit for amount of memory to use in processing large images.  Value is
-in thousands of bytes, or millions of bytes if "M" is attached to the
-number.  For example,
-.B \-max 4m
-selects 4000000 bytes.  If more space is needed, an error will occur.
-.TP
-.BI \-maxscans " N"
-Abort if the JPEG image contains more than
-.I N
-scans.  This feature demonstrates a method by which applications can guard
-against denial-of-service attacks instigated by specially-crafted malformed
-JPEG images containing numerous scans with missing image data or image data
-consisting only of "EOB runs" (a feature of progressive JPEG images that allows
-potentially hundreds of thousands of adjoining zero-value pixels to be
-represented using only a few bytes.)  Attempting to decompress such malformed
-JPEG images can cause excessive CPU activity, since the decompressor must fully
-process each scan (even if the scan is corrupt) before it can proceed to the
-next scan.
-.TP
-.BI \-outfile " name"
-Send output image to the named file, not to standard output.
-.TP
-.BI \-memsrc
-Load input file into memory before decompressing.  This feature was implemented
-mainly as a way of testing the in-memory source manager (jpeg_mem_src().)
-.TP
-.BI \-report
-Report decompression progress.
-.TP
-.BI \-skip " Y0,Y1"
-Decompress all rows of the JPEG image except those between Y0 and Y1
-(inclusive.)  Note that if decompression scaling is being used, then Y0 and Y1
-are relative to the scaled image dimensions.
-.TP
-.BI \-crop " WxH+X+Y"
-Decompress only a rectangular subregion of the image, starting at point X,Y
-with width W and height H.  If necessary, X will be shifted left to the nearest
-iMCU boundary, and the width will be increased accordingly.  Note that if
-decompression scaling is being used, then X, Y, W, and H are relative to the
-scaled image dimensions.  Currently this option only works with the
-PBMPLUS (PPM/PGM), GIF, and Targa output formats.
-.TP
-.BI \-strict
-Treat all warnings as fatal.  This feature also demonstrates a method by which
-applications can guard against attacks instigated by specially-crafted
-malformed JPEG images.  Enabling this option will cause the decompressor to
-abort if the JPEG image contains incomplete or corrupt image data.
-.TP
-.B \-verbose
-Enable debug printout.  More
-.BR \-v 's
-give more output.  Also, version information is printed at startup.
-.TP
-.B \-debug
-Same as
-.BR \-verbose .
-.TP
-.B \-version
-Print version information and exit.
-.SH EXAMPLES
-.LP
-This example decompresses the JPEG file foo.jpg, quantizes it to
-256 colors, and saves the output in 8-bit BMP format in foo.bmp:
-.IP
-.B djpeg \-colors 256 \-bmp
-.I foo.jpg
-.B >
-.I foo.bmp
-.SH HINTS
-To get a quick preview of an image, use the
-.B \-grayscale
-and/or
-.B \-scale
-switches.
-.B \-grayscale \-scale 1/8
-is the fastest case.
-.PP
-Several options are available that trade off image quality to gain speed.
-.B \-fast
-turns on the recommended settings.
-.PP
-.B \-dct fast
-and/or
-.B \-nosmooth
-gain speed at a small sacrifice in quality.
-When producing a color-quantized image,
-.B \-onepass \-dither ordered
-is fast but much lower quality than the default behavior.
-.B \-dither none
-may give acceptable results in two-pass mode, but is seldom tolerable in
-one-pass mode.
-.SH ENVIRONMENT
-.TP
-.B JPEGMEM
-If this environment variable is set, its value is the default memory limit.
-The value is specified as described for the
-.B \-maxmemory
-switch.
-.B JPEGMEM
-overrides the default value specified when the program was compiled, and
-itself is overridden by an explicit
-.BR \-maxmemory .
-.SH SEE ALSO
-.BR cjpeg (1),
-.BR jpegtran (1),
-.BR rdjpgcom (1),
-.BR wrjpgcom (1)
-.br
-.BR ppm (5),
-.BR pgm (5)
-.br
-Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
-Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44.
-.SH AUTHOR
-Independent JPEG Group
-.PP
-This file was modified by The libjpeg-turbo Project to include only information
-relevant to libjpeg-turbo, to wordsmith certain sections, and to describe
-features not present in libjpeg.
diff --git a/djpeg.c b/djpeg.c
index cc2eb9d..7be6756 100644
--- a/djpeg.c
+++ b/djpeg.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2013-2019 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010-2011, 2013-2017, 2019-2020, D. R. Commander.
+ * Copyright (C) 2010-2011, 2013-2017, 2019-2020, 2022, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -28,26 +28,16 @@
  * works regardless of which command line style is used.
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 #include "jversion.h"           /* for version message */
 #include "jconfigint.h"
 
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare free() */
-extern void free(void *ptr);
-#endif
-
 #include <ctype.h>              /* to declare isprint() */
 
-#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
-#ifdef __MWERKS__
-#include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>            /* ... and this */
-#endif
-#ifdef THINK_C
-#include <console.h>            /* Think declares it here */
-#endif
-#endif
-
 
 /* Create the add-on message string table. */
 
@@ -326,7 +316,9 @@
       if (++argn >= argc)       /* advance to next argument */
         usage();
       icc_filename = argv[argn];
+#ifdef SAVE_MARKERS_SUPPORTED
       jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xFFFF);
+#endif
 
     } else if (keymatch(arg, "map", 3)) {
       /* Quantize to a color map taken from an input file. */
@@ -554,11 +546,6 @@
 #endif
   JDIMENSION num_scanlines;
 
-  /* On Mac, fetch a command line. */
-#ifdef USE_CCOMMAND
-  argc = ccommand(&argv);
-#endif
-
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
     progname = "djpeg";         /* in case C library doesn't provide it */
@@ -655,7 +642,7 @@
         fprintf(stderr, "%s: memory allocation failure\n", progname);
         return EXIT_FAILURE;
       }
-      nbytes = JFREAD(input_file, &inbuffer[insize], INPUT_BUF_SIZE);
+      nbytes = fread(&inbuffer[insize], 1, INPUT_BUF_SIZE, input_file);
       if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) {
         if (file_index < argc)
           fprintf(stderr, "%s: can't read from %s\n", progname,
@@ -725,7 +712,7 @@
      * that skip_start <= skip_end.
      */
     if (skip_end > cinfo.output_height - 1) {
-      fprintf(stderr, "%s: skip region exceeds image height %d\n", progname,
+      fprintf(stderr, "%s: skip region exceeds image height %u\n", progname,
               cinfo.output_height);
       return EXIT_FAILURE;
     }
@@ -746,7 +733,7 @@
     }
     if ((tmp = jpeg_skip_scanlines(&cinfo, skip_end - skip_start + 1)) !=
         skip_end - skip_start + 1) {
-      fprintf(stderr, "%s: jpeg_skip_scanlines() returned %d rather than %d\n",
+      fprintf(stderr, "%s: jpeg_skip_scanlines() returned %u rather than %u\n",
               progname, tmp, skip_end - skip_start + 1);
       return EXIT_FAILURE;
     }
@@ -765,7 +752,7 @@
      */
     if (crop_x + crop_width > cinfo.output_width ||
         crop_y + crop_height > cinfo.output_height) {
-      fprintf(stderr, "%s: crop dimensions exceed image dimensions %d x %d\n",
+      fprintf(stderr, "%s: crop dimensions exceed image dimensions %u x %u\n",
               progname, cinfo.output_width, cinfo.output_height);
       return EXIT_FAILURE;
     }
@@ -786,7 +773,7 @@
 
     /* Process data */
     if ((tmp = jpeg_skip_scanlines(&cinfo, crop_y)) != crop_y) {
-      fprintf(stderr, "%s: jpeg_skip_scanlines() returned %d rather than %d\n",
+      fprintf(stderr, "%s: jpeg_skip_scanlines() returned %u rather than %u\n",
               progname, tmp, crop_y);
       return EXIT_FAILURE;
     }
@@ -799,7 +786,7 @@
          jpeg_skip_scanlines(&cinfo,
                              cinfo.output_height - crop_y - crop_height)) !=
         cinfo.output_height - crop_y - crop_height) {
-      fprintf(stderr, "%s: jpeg_skip_scanlines() returned %d rather than %d\n",
+      fprintf(stderr, "%s: jpeg_skip_scanlines() returned %u rather than %u\n",
               progname, tmp, cinfo.output_height - crop_y - crop_height);
       return EXIT_FAILURE;
     }
diff --git a/gtest/gtest-utils.cpp b/gtest/gtest-utils.cpp
index b6df7ab..b64fb88 100644
--- a/gtest/gtest-utils.cpp
+++ b/gtest/gtest-utils.cpp
@@ -40,7 +40,7 @@
 #endif
 
 void GetTestFilePath(base::FilePath* path, const std::string filename) {
-  ASSERT_TRUE(base::PathService::Get(base::DIR_SOURCE_ROOT, path));
+  ASSERT_TRUE(base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT, path));
   *path = path->AppendASCII("third_party");
   *path = path->AppendASCII("libjpeg_turbo");
   *path = path->AppendASCII("testimages");
diff --git a/gtest/tjbench-gtest-wrapper.cpp b/gtest/tjbench-gtest-wrapper.cpp
index 700b199..cdb671a 100644
--- a/gtest/tjbench-gtest-wrapper.cpp
+++ b/gtest/tjbench-gtest-wrapper.cpp
@@ -60,7 +60,7 @@
 
   static void SetUpTestSuite() {
     base::FilePath resource_path;
-    ASSERT_TRUE(base::PathService::Get(base::DIR_SOURCE_ROOT, &resource_path));
+    ASSERT_TRUE(base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT, &resource_path));
     resource_path = resource_path.AppendASCII("third_party");
     resource_path = resource_path.AppendASCII("libjpeg_turbo");
     resource_path = resource_path.AppendASCII("testimages");
@@ -130,7 +130,7 @@
 
   static void SetUpTestSuite() {
     base::FilePath resource_path;
-    ASSERT_TRUE(base::PathService::Get(base::DIR_SOURCE_ROOT, &resource_path));
+    ASSERT_TRUE(base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT, &resource_path));
     resource_path = resource_path.AppendASCII("third_party");
     resource_path = resource_path.AppendASCII("libjpeg_turbo");
     resource_path = resource_path.AppendASCII("testimages");
diff --git a/jcapimin.c b/jcapimin.c
index 178c55b..84e7ecc 100644
--- a/jcapimin.c
+++ b/jcapimin.c
@@ -4,8 +4,8 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
  * Modified 2003-2010 by Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -52,7 +52,7 @@
   {
     struct jpeg_error_mgr *err = cinfo->err;
     void *client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, sizeof(struct jpeg_compress_struct));
+    memset(cinfo, 0, sizeof(struct jpeg_compress_struct));
     cinfo->err = err;
     cinfo->client_data = client_data;
   }
diff --git a/jcarith.c b/jcarith.c
index b6d093f..b172052 100644
--- a/jcarith.c
+++ b/jcarith.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Developed 1997-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, 2018, D. R. Commander.
+ * Copyright (C) 2015, 2018, 2021-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -338,14 +338,14 @@
     compptr = cinfo->cur_comp_info[ci];
     /* DC needs no table for refinement scan */
     if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      memset(entropy->dc_stats[compptr->dc_tbl_no], 0, DC_STAT_BINS);
       /* Reset DC predictions to 0 */
       entropy->last_dc_val[ci] = 0;
       entropy->dc_context[ci] = 0;
     }
     /* AC needs no table when not present */
     if (cinfo->progressive_mode == 0 || cinfo->Se) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+      memset(entropy->ac_stats[compptr->ac_tbl_no], 0, AC_STAT_BINS);
     }
   }
 
@@ -836,7 +836,7 @@
      * We are fully adaptive here and need no extra
      * statistics gathering pass!
      */
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    ERREXIT(cinfo, JERR_NOTIMPL);
 
   /* We assume jcmaster.c already validated the progressive scan parameters. */
 
@@ -867,7 +867,7 @@
       if (entropy->dc_stats[tbl] == NULL)
         entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
           ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      memset(entropy->dc_stats[tbl], 0, DC_STAT_BINS);
       /* Initialize DC predictions to 0 */
       entropy->last_dc_val[ci] = 0;
       entropy->dc_context[ci] = 0;
@@ -880,7 +880,7 @@
       if (entropy->ac_stats[tbl] == NULL)
         entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
           ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+      memset(entropy->ac_stats[tbl], 0, AC_STAT_BINS);
 #ifdef CALCULATE_SPECTRAL_CONDITIONING
       if (cinfo->progressive_mode)
         /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
diff --git a/jccolext.c b/jccolext.c
index 303b322..20f891a 100644
--- a/jccolext.c
+++ b/jccolext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2012, 2015, D. R. Commander.
+ * Copyright (C) 2009-2012, 2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -48,9 +48,9 @@
     outptr2 = output_buf[2][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = inptr[RGB_RED];
-      g = inptr[RGB_GREEN];
-      b = inptr[RGB_BLUE];
+      r = RANGE_LIMIT(inptr[RGB_RED]);
+      g = RANGE_LIMIT(inptr[RGB_GREEN]);
+      b = RANGE_LIMIT(inptr[RGB_BLUE]);
       inptr += RGB_PIXELSIZE;
       /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
        * must be too; we do not need an explicit range-limiting operation.
@@ -100,9 +100,9 @@
     outptr = output_buf[0][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = inptr[RGB_RED];
-      g = inptr[RGB_GREEN];
-      b = inptr[RGB_BLUE];
+      r = RANGE_LIMIT(inptr[RGB_RED]);
+      g = RANGE_LIMIT(inptr[RGB_GREEN]);
+      b = RANGE_LIMIT(inptr[RGB_BLUE]);
       inptr += RGB_PIXELSIZE;
       /* Y */
       outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
diff --git a/jccolor.c b/jccolor.c
index bdc563c..fb9f1cc 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2012, 2015, D. R. Commander.
+ * Copyright (C) 2009-2012, 2015, 2022, D. R. Commander.
  * Copyright (C) 2014, MIPS Technologies, Inc., California.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -17,7 +17,6 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "jconfigint.h"
 
 
 /* Private subobject */
@@ -84,6 +83,18 @@
 #define B_CR_OFF        (7 * (MAXJSAMPLE + 1))
 #define TABLE_SIZE      (8 * (MAXJSAMPLE + 1))
 
+/* 12-bit samples use a 16-bit data type, so it is possible to pass
+ * out-of-range sample values (< 0 or > 4095) to jpeg_write_scanlines().
+ * Thus, we mask the incoming 12-bit samples to guard against overrunning
+ * or underrunning the conversion tables.
+ */
+
+#if BITS_IN_JSAMPLE == 12
+#define RANGE_LIMIT(value)  ((value) & 0xFFF)
+#else
+#define RANGE_LIMIT(value)  (value)
+#endif
+
 
 /* Include inline routines for colorspace extensions */
 
@@ -392,9 +403,9 @@
     outptr3 = output_buf[3][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = MAXJSAMPLE - inptr[0];
-      g = MAXJSAMPLE - inptr[1];
-      b = MAXJSAMPLE - inptr[2];
+      r = MAXJSAMPLE - RANGE_LIMIT(inptr[0]);
+      g = MAXJSAMPLE - RANGE_LIMIT(inptr[1]);
+      b = MAXJSAMPLE - RANGE_LIMIT(inptr[2]);
       /* K passes through as-is */
       outptr3[col] = inptr[3];
       inptr += 4;
diff --git a/jchuff.c b/jchuff.c
index 8ff817b..5d0276a 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014-2016, 2018-2022, D. R. Commander.
  * Copyright (C) 2015, Matthieu Darbois.
  * Copyright (C) 2018, Matthias Räncker.
  * Copyright (C) 2020, Arm Limited.
@@ -27,7 +27,6 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "jconfigint.h"
 #include <limits.h>
 
 /*
@@ -200,12 +199,12 @@
         entropy->dc_count_ptrs[dctbl] = (long *)
           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                       257 * sizeof(long));
-      MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long));
+      memset(entropy->dc_count_ptrs[dctbl], 0, 257 * sizeof(long));
       if (entropy->ac_count_ptrs[actbl] == NULL)
         entropy->ac_count_ptrs[actbl] = (long *)
           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                       257 * sizeof(long));
-      MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long));
+      memset(entropy->ac_count_ptrs[actbl], 0, 257 * sizeof(long));
 #endif
     } else {
       /* Compute derived values for Huffman tables */
@@ -315,8 +314,8 @@
    * this lets us detect duplicate VAL entries here, and later
    * allows emit_bits to detect any attempt to emit such symbols.
    */
-  MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco));
-  MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
+  memset(dtbl->ehufco, 0, sizeof(dtbl->ehufco));
+  memset(dtbl->ehufsi, 0, sizeof(dtbl->ehufsi));
 
   /* This is also a convenient place to check for out-of-range
    * and duplicated VAL entries.  We allow 0..255 for AC symbols
@@ -478,7 +477,7 @@
     buffer = _buffer; \
     while (bytes > 0) { \
       bytestocopy = MIN(bytes, state->free_in_buffer); \
-      MEMCOPY(state->next_output_byte, buffer, bytestocopy); \
+      memcpy(state->next_output_byte, buffer, bytestocopy); \
       state->next_output_byte += bytestocopy; \
       buffer += bytestocopy; \
       state->free_in_buffer -= bytestocopy; \
@@ -941,8 +940,8 @@
 
   /* This algorithm is explained in section K.2 of the JPEG standard */
 
-  MEMZERO(bits, sizeof(bits));
-  MEMZERO(codesize, sizeof(codesize));
+  memset(bits, 0, sizeof(bits));
+  memset(codesize, 0, sizeof(codesize));
   for (i = 0; i < 257; i++)
     others[i] = -1;             /* init links to empty */
 
@@ -1044,7 +1043,7 @@
   bits[i]--;
 
   /* Return final symbol counts (only for lengths 0..16) */
-  MEMCOPY(htbl->bits, bits, sizeof(htbl->bits));
+  memcpy(htbl->bits, bits, sizeof(htbl->bits));
 
   /* Return a list of the symbols sorted by code length */
   /* It's not real clear to me why we don't need to consider the codelength
@@ -1083,8 +1082,8 @@
   /* It's important not to apply jpeg_gen_optimal_table more than once
    * per table, because it clobbers the input frequency counts!
    */
-  MEMZERO(did_dc, sizeof(did_dc));
-  MEMZERO(did_ac, sizeof(did_ac));
+  memset(did_dc, 0, sizeof(did_dc));
+  memset(did_ac, 0, sizeof(did_ac));
 
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
diff --git a/jchuff.h b/jchuff.h
index 314a232..da7809a 100644
--- a/jchuff.h
+++ b/jchuff.h
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -25,6 +25,14 @@
 #define MAX_COEF_BITS  14
 #endif
 
+/* The progressive Huffman encoder uses an unsigned 16-bit data type to store
+ * absolute values of coefficients, because it is possible to inject a
+ * coefficient value of -32768 into the encoder by attempting to transform a
+ * malformed 12-bit JPEG image, and the absolute value of -32768 would overflow
+ * a signed 16-bit integer.
+ */
+typedef unsigned short UJCOEF;
+
 /* Derived data constructed for each Huffman table */
 
 typedef struct {
diff --git a/jcmaster.c b/jcmaster.c
index c2b2600..b821710 100644
--- a/jcmaster.c
+++ b/jcmaster.c
@@ -19,7 +19,6 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jpegcomp.h"
-#include "jconfigint.h"
 
 
 /* Private state */
diff --git a/jconfig.h b/jconfig.h
index d66ccf8..d347c33 100644
--- a/jconfig.h
+++ b/jconfig.h
@@ -4,10 +4,10 @@
 #define JPEG_LIB_VERSION  62
 
 /* libjpeg-turbo version */
-#define LIBJPEG_TURBO_VERSION  2.1.0
+#define LIBJPEG_TURBO_VERSION  2.1.5.1
 
 /* libjpeg-turbo version in integer form */
-#define LIBJPEG_TURBO_VERSION_NUMBER  2001000
+#define LIBJPEG_TURBO_VERSION_NUMBER  2001005
 
 /* Support arithmetic encoding */
 #define C_ARITH_CODING_SUPPORTED 1
@@ -32,37 +32,6 @@
 
 #define BITS_IN_JSAMPLE  8      /* use 8 or 12 */
 
-/* Define to 1 if you have the <locale.h> header file. */
-#define HAVE_LOCALE_H 1
-
-/* Define to 1 if you have the <stddef.h> header file. */
-#define HAVE_STDDEF_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define if you need to include <sys/types.h> to get size_t. */
-/* #undef NEED_SYS_TYPES_H 1 */
-
-/* Define if you have BSD-like bzero and bcopy in <strings.h> rather than
-   memset/memcpy in <string.h>. */
-/* #undef NEED_BSD_STRINGS */
-
-/* Define to 1 if the system has the type `unsigned char'. */
-#define HAVE_UNSIGNED_CHAR 1
-
-/* Define to 1 if the system has the type `unsigned short'. */
-#define HAVE_UNSIGNED_SHORT 1
-
-/* Compiler does not support pointers to undefined structures. */
-/* #undef INCOMPLETE_TYPES_BROKEN */
-
 /* Define if your (broken) compiler shifts signed values as if they were
    unsigned. */
 /* #undef RIGHT_SHIFT_IS_UNSIGNED */
-
-/* Define to empty if `const' does not conform to ANSI C. */
-/* #undef const */
-
-/* Define to `unsigned int' if <sys/types.h> does not define. */
-/* #undef size_t */
diff --git a/jconfig.h.in b/jconfig.h.in
index d4284d9..e018012 100644
--- a/jconfig.h.in
+++ b/jconfig.h.in
@@ -32,37 +32,6 @@
 
 #define BITS_IN_JSAMPLE  @BITS_IN_JSAMPLE@      /* use 8 or 12 */
 
-/* Define to 1 if you have the <locale.h> header file. */
-#cmakedefine HAVE_LOCALE_H 1
-
-/* Define to 1 if you have the <stddef.h> header file. */
-#cmakedefine HAVE_STDDEF_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#cmakedefine HAVE_STDLIB_H 1
-
-/* Define if you need to include <sys/types.h> to get size_t. */
-#cmakedefine NEED_SYS_TYPES_H 1
-
-/* Define if you have BSD-like bzero and bcopy in <strings.h> rather than
-   memset/memcpy in <string.h>. */
-#cmakedefine NEED_BSD_STRINGS 1
-
-/* Define to 1 if the system has the type `unsigned char'. */
-#cmakedefine HAVE_UNSIGNED_CHAR 1
-
-/* Define to 1 if the system has the type `unsigned short'. */
-#cmakedefine HAVE_UNSIGNED_SHORT 1
-
-/* Compiler does not support pointers to undefined structures. */
-#cmakedefine INCOMPLETE_TYPES_BROKEN 1
-
 /* Define if your (broken) compiler shifts signed values as if they were
    unsigned. */
 #cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
-
-/* Define to empty if `const' does not conform to ANSI C. */
-/* #undef const */
-
-/* Define to `unsigned int' if <sys/types.h> does not define. */
-/* #undef size_t */
diff --git a/jconfig.txt b/jconfig.txt
index 21f35c1..d593da9 100644
--- a/jconfig.txt
+++ b/jconfig.txt
@@ -26,50 +26,6 @@
  * #define the symbol if yes, #undef it if no.
  */
 
-/* Does your compiler support the declaration "unsigned char" ?
- * How about "unsigned short" ?
- */
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-
-/* Define "void" as "char" if your compiler doesn't know about type void.
- * NOTE: be sure to define void such that "void *" represents the most general
- * pointer type, e.g., that returned by malloc().
- */
-/* #define void char */
-
-/* Define "const" as empty if your compiler doesn't know the "const" keyword.
- */
-/* #define const */
-
-/* Define this if your system has an ANSI-conforming <stddef.h> file.
- */
-#define HAVE_STDDEF_H
-
-/* Define this if your system has an ANSI-conforming <stdlib.h> file.
- */
-#define HAVE_STDLIB_H
-
-/* Define this if your system does not have an ANSI/SysV <string.h>,
- * but does have a BSD-style <strings.h>.
- */
-#undef NEED_BSD_STRINGS
-
-/* Define this if your system does not provide typedef size_t in any of the
- * ANSI-standard places (stddef.h, stdlib.h, or stdio.h), but places it in
- * <sys/types.h> instead.
- */
-#undef NEED_SYS_TYPES_H
-
-/* Although a real ANSI C compiler can deal perfectly well with pointers to
- * unspecified structures (see "incomplete types" in the spec), a few pre-ANSI
- * and pseudo-ANSI compilers get confused.  To keep one of these bozos happy,
- * define INCOMPLETE_TYPES_BROKEN.  This is not recommended unless you
- * actually get "missing structure definition" warnings or errors while
- * compiling the JPEG code.
- */
-#undef INCOMPLETE_TYPES_BROKEN
-
 /* Define "boolean" as unsigned char, not int, on Windows systems.
  */
 #ifdef _WIN32
diff --git a/jconfigint.h b/jconfigint.h
index cb9915c..3af3ade 100644
--- a/jconfigint.h
+++ b/jconfigint.h
@@ -26,9 +26,10 @@
 #define PACKAGE_NAME  "libjpeg-turbo"
 
 /* Version number of package */
-#define VERSION  "2.1.0"
+#define VERSION  "2.1.5.1"
 
 /* The size of `size_t', as computed by sizeof. */
+#include <stdint.h>
 #if __WORDSIZE==64 || defined(_WIN64)
 #define SIZEOF_SIZE_T  8
 #else
diff --git a/jcphuff.c b/jcphuff.c
index 9bf9612..5006b67 100644
--- a/jcphuff.c
+++ b/jcphuff.c
@@ -4,9 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
- * Copyright (C) 2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
+ * Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
+ * Copyright (C) 2021, Alex Richardson.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -21,7 +22,6 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "jconfigint.h"
 #include <limits.h>
 
 #ifdef HAVE_INTRIN_H
@@ -82,11 +82,11 @@
   /* Pointer to routine to prepare data for encode_mcu_AC_first() */
   void (*AC_first_prepare) (const JCOEF *block,
                             const int *jpeg_natural_order_start, int Sl,
-                            int Al, JCOEF *values, size_t *zerobits);
+                            int Al, UJCOEF *values, size_t *zerobits);
   /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
   int (*AC_refine_prepare) (const JCOEF *block,
                             const int *jpeg_natural_order_start, int Sl,
-                            int Al, JCOEF *absvalues, size_t *bits);
+                            int Al, UJCOEF *absvalues, size_t *bits);
 
   /* Mode flag: TRUE for optimization, FALSE for actual data output */
   boolean gather_statistics;
@@ -156,14 +156,14 @@
                                        JBLOCKROW *MCU_data);
 METHODDEF(void) encode_mcu_AC_first_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
                                        JBLOCKROW *MCU_data);
 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
                                         JBLOCKROW *MCU_data);
 METHODDEF(int) encode_mcu_AC_refine_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
                                         JBLOCKROW *MCU_data);
 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
@@ -274,7 +274,7 @@
         entropy->count_ptrs[tbl] = (long *)
           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                       257 * sizeof(long));
-      MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
+      memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long));
     } else {
       /* Compute derived values for Huffman table */
       /* We may do this more than once for a table, but it's not expensive */
@@ -583,8 +583,8 @@
       continue; \
     /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
     temp2 ^= temp; \
-    values[k] = temp; \
-    values[k + DCTSIZE2] = temp2; \
+    values[k] = (UJCOEF)temp; \
+    values[k + DCTSIZE2] = (UJCOEF)temp2; \
     zerobits |= ((size_t)1U) << k; \
   } \
 }
@@ -592,7 +592,7 @@
 METHODDEF(void)
 encode_mcu_AC_first_prepare(const JCOEF *block,
                             const int *jpeg_natural_order_start, int Sl,
-                            int Al, JCOEF *values, size_t *bits)
+                            int Al, UJCOEF *values, size_t *bits)
 {
   register int k, temp, temp2;
   size_t zerobits = 0U;
@@ -665,9 +665,9 @@
   register int nbits, r;
   int Sl = cinfo->Se - cinfo->Ss + 1;
   int Al = cinfo->Al;
-  JCOEF values_unaligned[2 * DCTSIZE2 + 15];
-  JCOEF *values;
-  const JCOEF *cvalue;
+  UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
+  UJCOEF *values;
+  const UJCOEF *cvalue;
   size_t zerobits;
   size_t bits[8 / SIZEOF_SIZE_T];
 
@@ -680,7 +680,7 @@
       emit_restart(entropy, entropy->next_restart_num);
 
 #ifdef WITH_SIMD
-  cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16);
+  cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);
 #else
   /* Not using SIMD, so alignment is not needed */
   cvalue = values = values_unaligned;
@@ -814,7 +814,7 @@
       zerobits |= ((size_t)1U) << k; \
       signbits |= ((size_t)(temp2 + 1)) << k; \
     } \
-    absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
+    absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \
     if (temp == 1) \
       EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \
   } \
@@ -823,7 +823,7 @@
 METHODDEF(int)
 encode_mcu_AC_refine_prepare(const JCOEF *block,
                              const int *jpeg_natural_order_start, int Sl,
-                             int Al, JCOEF *absvalues, size_t *bits)
+                             int Al, UJCOEF *absvalues, size_t *bits)
 {
   register int k, temp, temp2;
   int EOB = 0;
@@ -930,9 +930,9 @@
   unsigned int BR;
   int Sl = cinfo->Se - cinfo->Ss + 1;
   int Al = cinfo->Al;
-  JCOEF absvalues_unaligned[DCTSIZE2 + 15];
-  JCOEF *absvalues;
-  const JCOEF *cabsvalue, *EOBPTR;
+  UJCOEF absvalues_unaligned[DCTSIZE2 + 15];
+  UJCOEF *absvalues;
+  const UJCOEF *cabsvalue, *EOBPTR;
   size_t zerobits, signbits;
   size_t bits[16 / SIZEOF_SIZE_T];
 
@@ -945,7 +945,7 @@
       emit_restart(entropy, entropy->next_restart_num);
 
 #ifdef WITH_SIMD
-  cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16);
+  cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
 #else
   /* Not using SIMD, so alignment is not needed */
   cabsvalue = absvalues = absvalues_unaligned;
@@ -1061,7 +1061,7 @@
   /* It's important not to apply jpeg_gen_optimal_table more than once
    * per table, because it clobbers the input frequency counts!
    */
-  MEMZERO(did, sizeof(did));
+  memset(did, 0, sizeof(did));
 
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
diff --git a/jcprepct.c b/jcprepct.c
index d59713a..f27cc34 100644
--- a/jcprepct.c
+++ b/jcprepct.c
@@ -3,8 +3,8 @@
  *
  * This file is part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -289,8 +289,8 @@
                      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
        (JDIMENSION)(3 * rgroup_height));
     /* Copy true buffer row pointers into the middle of the fake row array */
-    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
-            3 * rgroup_height * sizeof(JSAMPROW));
+    memcpy(fake_buffer + rgroup_height, true_buffer,
+           3 * rgroup_height * sizeof(JSAMPROW));
     /* Fill in the above and below wraparound pointers */
     for (i = 0; i < rgroup_height; i++) {
       fake_buffer[i] = true_buffer[2 * rgroup_height + i];
diff --git a/jctrans.c b/jctrans.c
index ab6a218..e121028 100644
--- a/jctrans.c
+++ b/jctrans.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1995-1998, Thomas G. Lane.
  * Modified 2000-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2020, D. R. Commander.
+ * Copyright (C) 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -100,8 +100,8 @@
       qtblptr = &dstinfo->quant_tbl_ptrs[tblno];
       if (*qtblptr == NULL)
         *qtblptr = jpeg_alloc_quant_table((j_common_ptr)dstinfo);
-      MEMCOPY((*qtblptr)->quantval, srcinfo->quant_tbl_ptrs[tblno]->quantval,
-              sizeof((*qtblptr)->quantval));
+      memcpy((*qtblptr)->quantval, srcinfo->quant_tbl_ptrs[tblno]->quantval,
+             sizeof((*qtblptr)->quantval));
       (*qtblptr)->sent_table = FALSE;
     }
   }
diff --git a/jdapimin.c b/jdapimin.c
index 4609b13..30126a0 100644
--- a/jdapimin.c
+++ b/jdapimin.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2016, D. R. Commander.
+ * Copyright (C) 2016, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -23,7 +23,6 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jdmaster.h"
-#include "jconfigint.h"
 
 
 /*
@@ -53,7 +52,7 @@
   {
     struct jpeg_error_mgr *err = cinfo->err;
     void *client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, sizeof(struct jpeg_decompress_struct));
+    memset(cinfo, 0, sizeof(struct jpeg_decompress_struct));
     cinfo->err = err;
     cinfo->client_data = client_data;
   }
@@ -92,7 +91,7 @@
   cinfo->master = (struct jpeg_decomp_master *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
                                 sizeof(my_decomp_master));
-  MEMZERO(cinfo->master, sizeof(my_decomp_master));
+  memset(cinfo->master, 0, sizeof(my_decomp_master));
 }
 
 
diff --git a/jdapistd.c b/jdapistd.c
index 695a620..02cd0cb 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2020, D. R. Commander.
+ * Copyright (C) 2010, 2015-2020, 2022, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -159,8 +159,12 @@
   JDIMENSION input_xoffset;
   boolean reinit_upsampler = FALSE;
   jpeg_component_info *compptr;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+#endif
 
-  if (cinfo->global_state != DSTATE_SCANNING || cinfo->output_scanline != 0)
+  if ((cinfo->global_state != DSTATE_SCANNING &&
+       cinfo->global_state != DSTATE_BUFIMAGE) || cinfo->output_scanline != 0)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   if (!xoffset || !width)
@@ -208,6 +212,13 @@
    */
   *width = *width + input_xoffset - *xoffset;
   cinfo->output_width = *width;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+    upsample->out_row_width =
+      cinfo->output_width * cinfo->out_color_components;
+  }
+#endif
 
   /* Set the first and last iMCU columns that we must decompress.  These values
    * will be used in single-scan decompressions.
@@ -318,7 +329,9 @@
 read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   JDIMENSION n;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
   my_master_ptr master = (my_master_ptr)cinfo->master;
+#endif
   JSAMPLE dummy_sample[1] = { 0 };
   JSAMPROW dummy_row = dummy_sample;
   JSAMPARRAY scanlines = NULL;
@@ -342,10 +355,12 @@
     cinfo->cquantize->color_quantize = noop_quantize;
   }
 
+#ifdef UPSAMPLE_MERGING_SUPPORTED
   if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
     my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
     scanlines = &upsample->spare_row;
   }
+#endif
 
   for (n = 0; n < num_lines; n++)
     jpeg_read_scanlines(cinfo, scanlines, 1);
@@ -511,7 +526,7 @@
    * all of the entropy decoding occurs in jpeg_start_decompress(), assuming
    * that the input data source is non-suspending.  This makes skipping easy.
    */
-  if (cinfo->inputctl->has_multiple_scans) {
+  if (cinfo->inputctl->has_multiple_scans || cinfo->buffered_image) {
     if (cinfo->upsample->need_context_rows) {
       cinfo->output_scanline += lines_to_skip;
       cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
diff --git a/jdarith.c b/jdarith.c
index 7f0d3a7..21575e8 100644
--- a/jdarith.c
+++ b/jdarith.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Developed 1997-2015 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2020, D. R. Commander.
+ * Copyright (C) 2015-2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -210,13 +210,13 @@
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
     if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      memset(entropy->dc_stats[compptr->dc_tbl_no], 0, DC_STAT_BINS);
       /* Reset DC predictions to 0 */
       entropy->last_dc_val[ci] = 0;
       entropy->dc_context[ci] = 0;
     }
     if (!cinfo->progressive_mode || cinfo->Ss) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+      memset(entropy->ac_stats[compptr->ac_tbl_no], 0, AC_STAT_BINS);
     }
   }
 
@@ -471,17 +471,17 @@
       if (*thiscoef) {                          /* previously nonzero coef */
         if (arith_decode(cinfo, st + 2)) {
           if (*thiscoef < 0)
-            *thiscoef += m1;
+            *thiscoef += (JCOEF)m1;
           else
-            *thiscoef += p1;
+            *thiscoef += (JCOEF)p1;
         }
         break;
       }
       if (arith_decode(cinfo, st + 1)) {        /* newly nonzero coef */
         if (arith_decode(cinfo, entropy->fixed_bin))
-          *thiscoef = m1;
+          *thiscoef = (JCOEF)m1;
         else
-          *thiscoef = p1;
+          *thiscoef = (JCOEF)p1;
         break;
       }
       st += 3;  k++;
@@ -698,8 +698,8 @@
     /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
      * This ought to be an error condition, but we make it a warning.
      */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-        (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1))
+    if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2 - 1 ||
+        cinfo->Ah != 0 || cinfo->Al != 0)
       WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
     /* Select MCU decoding routine */
     entropy->pub.decode_mcu = decode_mcu;
@@ -715,7 +715,7 @@
       if (entropy->dc_stats[tbl] == NULL)
         entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
           ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      memset(entropy->dc_stats[tbl], 0, DC_STAT_BINS);
       /* Initialize DC predictions to 0 */
       entropy->last_dc_val[ci] = 0;
       entropy->dc_context[ci] = 0;
@@ -727,7 +727,7 @@
       if (entropy->ac_stats[tbl] == NULL)
         entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
           ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+      memset(entropy->ac_stats[tbl], 0, AC_STAT_BINS);
     }
   }
 
diff --git a/jdatadst-tj.c b/jdatadst-tj.c
index fdaa2de..e10d981 100644
--- a/jdatadst-tj.c
+++ b/jdatadst-tj.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * Modified 2009-2012 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2014, 2016, 2019, D. R. Commander.
+ * Copyright (C) 2011, 2014, 2016, 2019, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -23,10 +23,6 @@
 #include "jpeglib.h"
 #include "jerror.h"
 
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
-extern void *malloc(size_t size);
-extern void free(void *ptr);
-#endif
 void jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
                       unsigned long *outsize, boolean alloc);
 
@@ -101,7 +97,7 @@
   if (nextbuffer == NULL)
     ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
 
-  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
+  memcpy(nextbuffer, dest->buffer, dest->bufsize);
 
   free(dest->newbuffer);
 
diff --git a/jdatadst.c b/jdatadst.c
index 246fffb..6b4fed2 100644
--- a/jdatadst.c
+++ b/jdatadst.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * Modified 2009-2012 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2013, 2016, D. R. Commander.
+ * Copyright (C) 2013, 2016, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -23,11 +23,6 @@
 #include "jpeglib.h"
 #include "jerror.h"
 
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
-extern void *malloc(size_t size);
-extern void free(void *ptr);
-#endif
-
 
 /* Expanded data destination object for stdio output */
 
@@ -116,7 +111,7 @@
 {
   my_dest_ptr dest = (my_dest_ptr)cinfo->dest;
 
-  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
+  if (fwrite(dest->buffer, 1, OUTPUT_BUF_SIZE, dest->outfile) !=
       (size_t)OUTPUT_BUF_SIZE)
     ERREXIT(cinfo, JERR_FILE_WRITE);
 
@@ -141,7 +136,7 @@
   if (nextbuffer == NULL)
     ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
 
-  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
+  memcpy(nextbuffer, dest->buffer, dest->bufsize);
 
   free(dest->newbuffer);
 
@@ -175,7 +170,7 @@
 
   /* Write any data remaining in the buffer */
   if (datacount > 0) {
-    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
+    if (fwrite(dest->buffer, 1, datacount, dest->outfile) != datacount)
       ERREXIT(cinfo, JERR_FILE_WRITE);
   }
   fflush(dest->outfile);
diff --git a/jdatasrc.c b/jdatasrc.c
index eadb4a2..e36a30d 100644
--- a/jdatasrc.c
+++ b/jdatasrc.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * Modified 2009-2011 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2013, 2016, D. R. Commander.
+ * Copyright (C) 2013, 2016, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -104,7 +104,7 @@
   my_src_ptr src = (my_src_ptr)cinfo->src;
   size_t nbytes;
 
-  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+  nbytes = fread(src->buffer, 1, INPUT_BUF_SIZE, src->infile);
 
   if (nbytes <= 0) {
     if (src->start_of_file)     /* Treat empty input file as fatal error */
diff --git a/jdcoefct.c b/jdcoefct.c
index 15e6cde..88e10c0 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander.
+ * Copyright (C) 2010, 2015-2016, 2019-2020, 2022, D. R. Commander.
  * Copyright (C) 2015, 2020, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -475,7 +475,7 @@
     if (!compptr->component_needed)
       continue;
     /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row - 1) {
+    if (cinfo->output_iMCU_row + 1 < last_iMCU_row) {
       block_rows = compptr->v_samp_factor;
       access_rows = block_rows * 3; /* this and next two iMCU rows */
     } else if (cinfo->output_iMCU_row < last_iMCU_row) {
@@ -560,7 +560,7 @@
         next_block_row = buffer_ptr;
 
       if (block_row < block_rows - 2 ||
-          cinfo->output_iMCU_row < last_iMCU_row - 1)
+          cinfo->output_iMCU_row + 1 < last_iMCU_row)
         next_next_block_row =
           buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
       else
diff --git a/jdcolext.c b/jdcolext.c
index 863c7a2..fc7e7b8 100644
--- a/jdcolext.c
+++ b/jdcolext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, 2015, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2015, 2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -62,10 +62,10 @@
                               ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                                 SCALEBITS))];
       outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
-      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */
       /* alpha channel value */
 #ifdef RGB_ALPHA
-      outptr[RGB_ALPHA] = 0xFF;
+      outptr[RGB_ALPHA] = MAXJSAMPLE;
 #endif
       outptr += RGB_PIXELSIZE;
     }
@@ -94,10 +94,10 @@
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
       outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
-      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */
       /* alpha channel value */
 #ifdef RGB_ALPHA
-      outptr[RGB_ALPHA] = 0xFF;
+      outptr[RGB_ALPHA] = MAXJSAMPLE;
 #endif
       outptr += RGB_PIXELSIZE;
     }
@@ -130,10 +130,10 @@
       outptr[RGB_RED] = inptr0[col];
       outptr[RGB_GREEN] = inptr1[col];
       outptr[RGB_BLUE] = inptr2[col];
-      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */
       /* alpha channel value */
 #ifdef RGB_ALPHA
-      outptr[RGB_ALPHA] = 0xFF;
+      outptr[RGB_ALPHA] = MAXJSAMPLE;
 #endif
       outptr += RGB_PIXELSIZE;
     }
diff --git a/jdcolor.c b/jdcolor.c
index 8da2b4e..735190b 100644
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -18,7 +18,6 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "jconfigint.h"
 
 
 /* Private subobject */
diff --git a/jddctmgr.c b/jddctmgr.c
index 266f446..e78d7be 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -6,7 +6,7 @@
  * Modified 2002-2010 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, 2015, D. R. Commander.
+ * Copyright (C) 2010, 2015, 2022, D. R. Commander.
  * Copyright (C) 2013, MIPS Technologies, Inc., California.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -345,7 +345,7 @@
     compptr->dct_table =
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                   sizeof(multiplier_table));
-    MEMZERO(compptr->dct_table, sizeof(multiplier_table));
+    memset(compptr->dct_table, 0, sizeof(multiplier_table));
     /* Mark multiplier table not yet set up for any method */
     idct->cur_method[ci] = -1;
   }
diff --git a/jdhuff.c b/jdhuff.c
index f786c10..679d221 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -584,7 +584,7 @@
        * behavior is, to the best of our understanding, innocuous, and it is
        * unclear how to work around it without potentially affecting
        * performance.  Thus, we (hopefully temporarily) suppress UBSan integer
-       * overflow errors for this function.
+       * overflow errors for this function and decode_mcu_fast().
        */
       s += state.last_dc_val[ci];
       state.last_dc_val[ci] = s;
@@ -651,6 +651,12 @@
 }
 
 
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+__attribute__((no_sanitize("signed-integer-overflow"),
+               no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
 LOCAL(boolean)
 decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {
@@ -681,6 +687,9 @@
 
     if (entropy->dc_needed[blkn]) {
       int ci = cinfo->MCU_membership[blkn];
+      /* Refer to the comment in decode_mcu_slow() regarding the supression of
+       * a UBSan integer overflow error in this line of code.
+       */
       s += state.last_dc_val[ci];
       state.last_dc_val[ci] = s;
       if (block)
diff --git a/jdicc.c b/jdicc.c
index a1a5b86..50aa9a9 100644
--- a/jdicc.c
+++ b/jdicc.c
@@ -18,10 +18,6 @@
 #include "jpeglib.h"
 #include "jerror.h"
 
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc() */
-extern void *malloc(size_t size);
-#endif
-
 
 #define ICC_MARKER  (JPEG_APP0 + 2)     /* JPEG marker code for ICC */
 #define ICC_OVERHEAD_LEN  14            /* size of non-profile data in APP2 */
diff --git a/jdinput.c b/jdinput.c
index deec618..1bc5aff 100644
--- a/jdinput.c
+++ b/jdinput.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2016, 2018, D. R. Commander.
+ * Copyright (C) 2010, 2016, 2018, 2022, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -264,7 +264,7 @@
     qtbl = (JQUANT_TBL *)
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                   sizeof(JQUANT_TBL));
-    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL));
+    memcpy(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL));
     compptr->quant_table = qtbl;
   }
 }
diff --git a/jdmainct.c b/jdmainct.c
index f466b25..d332e6b 100644
--- a/jdmainct.c
+++ b/jdmainct.c
@@ -18,7 +18,6 @@
 
 #include "jinclude.h"
 #include "jdmainct.h"
-#include "jconfigint.h"
 
 
 /*
diff --git a/jdmarker.c b/jdmarker.c
index b964c3a..f7eba61 100644
--- a/jdmarker.c
+++ b/jdmarker.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2012, 2015, D. R. Commander.
+ * Copyright (C) 2012, 2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -473,7 +473,7 @@
     for (i = 0; i < count; i++)
       INPUT_BYTE(cinfo, huffval[i], return FALSE);
 
-    MEMZERO(&huffval[count], (256 - count) * sizeof(UINT8));
+    memset(&huffval[count], 0, (256 - count) * sizeof(UINT8));
 
     length -= count;
 
@@ -491,8 +491,8 @@
     if (*htblptr == NULL)
       *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
 
-    MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
-    MEMCOPY((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval));
+    memcpy((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+    memcpy((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval));
   }
 
   if (length != 0)
diff --git a/jdmaster.c b/jdmaster.c
index cbc8774..a3690bf 100644
--- a/jdmaster.c
+++ b/jdmaster.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2002-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2016, 2019, D. R. Commander.
+ * Copyright (C) 2009-2011, 2016, 2019, 2022, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -417,7 +417,7 @@
   table += (MAXJSAMPLE + 1);    /* allow negative subscripts of simple table */
   cinfo->sample_range_limit = table;
   /* First segment of "simple" table: limit[x] = 0 for x < 0 */
-  MEMZERO(table - (MAXJSAMPLE + 1), (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
+  memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
   /* Main part of "simple" table: limit[x] = x */
   for (i = 0; i <= MAXJSAMPLE; i++)
     table[i] = (JSAMPLE)i;
@@ -426,10 +426,10 @@
   for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
     table[i] = MAXJSAMPLE;
   /* Second half of post-IDCT table */
-  MEMZERO(table + (2 * (MAXJSAMPLE + 1)),
-          (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
-  MEMCOPY(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
-          cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
+  memset(table + (2 * (MAXJSAMPLE + 1)), 0,
+         (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
+  memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
+         cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
 }
 
 
diff --git a/jdmerge.c b/jdmerge.c
index 3a456d6..38b0027 100644
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -42,7 +42,6 @@
 #include "jpeglib.h"
 #include "jdmerge.h"
 #include "jsimd.h"
-#include "jconfigint.h"
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
diff --git a/jdmrgext.c b/jdmrgext.c
index 9bf4f1a..038abc7 100644
--- a/jdmrgext.c
+++ b/jdmrgext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2020, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2020, 2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -57,7 +57,7 @@
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr[RGB_ALPHA] = 0xFF;
+    outptr[RGB_ALPHA] = MAXJSAMPLE;
 #endif
     outptr += RGB_PIXELSIZE;
     y  = *inptr0++;
@@ -65,7 +65,7 @@
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr[RGB_ALPHA] = 0xFF;
+    outptr[RGB_ALPHA] = MAXJSAMPLE;
 #endif
     outptr += RGB_PIXELSIZE;
   }
@@ -81,7 +81,7 @@
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr[RGB_ALPHA] = 0xFF;
+    outptr[RGB_ALPHA] = MAXJSAMPLE;
 #endif
   }
 }
@@ -131,7 +131,7 @@
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr0[RGB_ALPHA] = 0xFF;
+    outptr0[RGB_ALPHA] = MAXJSAMPLE;
 #endif
     outptr0 += RGB_PIXELSIZE;
     y  = *inptr00++;
@@ -139,7 +139,7 @@
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr0[RGB_ALPHA] = 0xFF;
+    outptr0[RGB_ALPHA] = MAXJSAMPLE;
 #endif
     outptr0 += RGB_PIXELSIZE;
     y  = *inptr01++;
@@ -147,7 +147,7 @@
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr1[RGB_ALPHA] = 0xFF;
+    outptr1[RGB_ALPHA] = MAXJSAMPLE;
 #endif
     outptr1 += RGB_PIXELSIZE;
     y  = *inptr01++;
@@ -155,7 +155,7 @@
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr1[RGB_ALPHA] = 0xFF;
+    outptr1[RGB_ALPHA] = MAXJSAMPLE;
 #endif
     outptr1 += RGB_PIXELSIZE;
   }
@@ -171,14 +171,14 @@
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr0[RGB_ALPHA] = 0xFF;
+    outptr0[RGB_ALPHA] = MAXJSAMPLE;
 #endif
     y  = *inptr01;
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr1[RGB_ALPHA] = 0xFF;
+    outptr1[RGB_ALPHA] = MAXJSAMPLE;
 #endif
   }
 }
diff --git a/jdphuff.c b/jdphuff.c
index c6d82ca..9680ebc 100644
--- a/jdphuff.c
+++ b/jdphuff.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, 2018-2021, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -578,9 +578,9 @@
             if (GET_BITS(1)) {
               if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
                 if (*thiscoef >= 0)
-                  *thiscoef += p1;
+                  *thiscoef += (JCOEF)p1;
                 else
-                  *thiscoef += m1;
+                  *thiscoef += (JCOEF)m1;
               }
             }
           } else {
@@ -612,9 +612,9 @@
           if (GET_BITS(1)) {
             if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
               if (*thiscoef >= 0)
-                *thiscoef += p1;
+                *thiscoef += (JCOEF)p1;
               else
-                *thiscoef += m1;
+                *thiscoef += (JCOEF)m1;
             }
           }
         }
diff --git a/jerror.c b/jerror.c
index 936c4f5..d0ab5b8 100644
--- a/jerror.c
+++ b/jerror.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -189,13 +189,13 @@
 
   /* Format the message into the passed buffer */
   if (isstring)
-    sprintf(buffer, msgtext, err->msg_parm.s);
+    SNPRINTF(buffer, JMSG_LENGTH_MAX, msgtext, err->msg_parm.s);
   else
-    sprintf(buffer, msgtext,
-            err->msg_parm.i[0], err->msg_parm.i[1],
-            err->msg_parm.i[2], err->msg_parm.i[3],
-            err->msg_parm.i[4], err->msg_parm.i[5],
-            err->msg_parm.i[6], err->msg_parm.i[7]);
+    SNPRINTF(buffer, JMSG_LENGTH_MAX, msgtext,
+             err->msg_parm.i[0], err->msg_parm.i[1],
+             err->msg_parm.i[2], err->msg_parm.i[3],
+             err->msg_parm.i[4], err->msg_parm.i[5],
+             err->msg_parm.i[6], err->msg_parm.i[7]);
 }
 
 
diff --git a/jerror.h b/jerror.h
index 4476df2..eb44a11 100644
--- a/jerror.h
+++ b/jerror.h
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2014, 2017, D. R. Commander.
+ * Copyright (C) 2014, 2017, 2021-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -103,7 +103,7 @@
          "Cannot transcode due to multiple use of quantization table %d")
 JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
 JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
-JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOTIMPL, "Requested features are incompatible")
 JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
 #if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
@@ -268,6 +268,7 @@
 #define ERREXITS(cinfo, code, str) \
   ((cinfo)->err->msg_code = (code), \
    strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (cinfo)->err->msg_parm.s[JMSG_STR_PARM_MAX - 1] = '\0', \
    (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
 
 #define MAKESTMT(stuff)         do { stuff } while (0)
@@ -324,6 +325,7 @@
 #define TRACEMSS(cinfo, lvl, code, str) \
   ((cinfo)->err->msg_code = (code), \
    strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (cinfo)->err->msg_parm.s[JMSG_STR_PARM_MAX - 1] = '\0', \
    (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)))
 
 #endif /* JERROR_H */
diff --git a/jinclude.h b/jinclude.h
index c1bcf7d..e8d983a 100644
--- a/jinclude.h
+++ b/jinclude.h
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1994, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -17,72 +17,129 @@
  * JPEG library.  Most applications need only include jpeglib.h.
  */
 
+#ifndef __JINCLUDE_H__
+#define __JINCLUDE_H__
 
 /* Include auto-config file to find out which system include files we need. */
 
 #include "jconfig.h"            /* auto configuration options */
+#include "jconfigint.h"
 #define JCONFIG_INCLUDED        /* so that jpeglib.h doesn't do it again */
 
 /*
- * We need the NULL macro and size_t typedef.
- * On an ANSI-conforming system it is sufficient to include <stddef.h>.
- * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
- * pull in <sys/types.h> as well.
  * Note that the core JPEG library does not require <stdio.h>;
  * only the default error handler and data source/destination modules do.
  * But we must pull it in because of the references to FILE in jpeglib.h.
  * You can remove those references if you want to compile without <stdio.h>.
  */
 
-#ifdef HAVE_STDDEF_H
 #include <stddef.h>
-#endif
-
-#ifdef HAVE_STDLIB_H
 #include <stdlib.h>
-#endif
-
-#ifdef NEED_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
 #include <stdio.h>
+#include <string.h>
 
 /*
- * We need memory copying and zeroing functions, plus strncpy().
- * ANSI and System V implementations declare these in <string.h>.
- * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
- * Some systems may declare memset and memcpy in <memory.h>.
- *
- * NOTE: we assume the size parameters to these functions are of type size_t.
- * Change the casts in these macros if not!
+ * These macros/inline functions facilitate using Microsoft's "safe string"
+ * functions with Visual Studio builds without the need to scatter #ifdefs
+ * throughout the code base.
  */
 
-#ifdef NEED_BSD_STRINGS
 
-#include <strings.h>
-#define MEMZERO(target, size) \
-  bzero((void *)(target), (size_t)(size))
-#define MEMCOPY(dest, src, size) \
-  bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+#ifdef _MSC_VER
 
-#else /* not BSD, assume ANSI/SysV string lib */
+#define SNPRINTF(str, n, format, ...) \
+  _snprintf_s(str, n, _TRUNCATE, format, ##__VA_ARGS__)
 
-#include <string.h>
-#define MEMZERO(target, size) \
-  memset((void *)(target), 0, (size_t)(size))
-#define MEMCOPY(dest, src, size) \
-  memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+#else
+
+#define SNPRINTF  snprintf
 
 #endif
 
-/*
- * The modules that use fread() and fwrite() always invoke them through
- * these macros.  On some systems you may need to twiddle the argument casts.
- * CAUTION: argument order is different from underlying functions!
+
+#ifndef NO_GETENV
+
+#ifdef _MSC_VER
+
+static INLINE int GETENV_S(char *buffer, size_t buffer_size, const char *name)
+{
+  size_t required_size;
+
+  return (int)getenv_s(&required_size, buffer, buffer_size, name);
+}
+
+#else /* _MSC_VER */
+
+#include <errno.h>
+
+/* This provides a similar interface to the Microsoft/C11 getenv_s() function,
+ * but other than parameter validation, it has no advantages over getenv().
  */
 
-#define JFREAD(file, buf, sizeofbuf) \
-  ((size_t)fread((void *)(buf), (size_t)1, (size_t)(sizeofbuf), (file)))
-#define JFWRITE(file, buf, sizeofbuf) \
-  ((size_t)fwrite((const void *)(buf), (size_t)1, (size_t)(sizeofbuf), (file)))
+static INLINE int GETENV_S(char *buffer, size_t buffer_size, const char *name)
+{
+  char *env;
+
+  if (!buffer) {
+    if (buffer_size == 0)
+      return 0;
+    else
+      return (errno = EINVAL);
+  }
+  if (buffer_size == 0)
+    return (errno = EINVAL);
+  if (!name) {
+    *buffer = 0;
+    return 0;
+  }
+
+  env = getenv(name);
+  if (!env)
+  {
+    *buffer = 0;
+    return 0;
+  }
+
+  if (strlen(env) + 1 > buffer_size) {
+    *buffer = 0;
+    return ERANGE;
+  }
+
+  strncpy(buffer, env, buffer_size);
+
+  return 0;
+}
+
+#endif /* _MSC_VER */
+
+#endif /* NO_GETENV */
+
+
+#ifndef NO_PUTENV
+
+#ifdef _WIN32
+
+#define PUTENV_S(name, value)  _putenv_s(name, value)
+
+#else
+
+/* This provides a similar interface to the Microsoft _putenv_s() function, but
+ * other than parameter validation, it has no advantages over setenv().
+ */
+
+static INLINE int PUTENV_S(const char *name, const char *value)
+{
+  if (!name || !value)
+    return (errno = EINVAL);
+
+  setenv(name, value, 1);
+
+  return errno;
+}
+
+#endif /* _WIN32 */
+
+#endif /* NO_PUTENV */
+
+
+#endif /* JINCLUDE_H */
diff --git a/jmemmgr.c b/jmemmgr.c
index 508ca74..a40446f 100644
--- a/jmemmgr.c
+++ b/jmemmgr.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2016, D. R. Commander.
+ * Copyright (C) 2016, 2021-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -37,12 +37,6 @@
 #endif
 #include <limits.h>
 
-#ifndef NO_GETENV
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare getenv() */
-extern char *getenv(const char *name);
-#endif
-#endif
-
 
 LOCAL(size_t)
 round_up_pow2(size_t a, size_t b)
@@ -74,10 +68,13 @@
  * There isn't any really portable way to determine the worst-case alignment
  * requirement.  This module assumes that the alignment requirement is
  * multiples of ALIGN_SIZE.
- * By default, we define ALIGN_SIZE as sizeof(double).  This is necessary on
- * some workstations (where doubles really do need 8-byte alignment) and will
- * work fine on nearly everything.  If your machine has lesser alignment needs,
- * you can save a few bytes by making ALIGN_SIZE smaller.
+ * By default, we define ALIGN_SIZE as the maximum of sizeof(double) and
+ * sizeof(void *).  This is necessary on some workstations (where doubles
+ * really do need 8-byte alignment) and will work fine on nearly everything.
+ * We use the maximum of sizeof(double) and sizeof(void *) since sizeof(double)
+ * may be insufficient, for example, on CHERI-enabled platforms with 16-byte
+ * pointers and a 16-byte alignment requirement.  If your machine has lesser
+ * alignment needs, you can save a few bytes by making ALIGN_SIZE smaller.
  * The only place I know of where this will NOT work is certain Macintosh
  * 680x0 compilers that define double as a 10-byte IEEE extended float.
  * Doing 10-byte alignment is counterproductive because longwords won't be
@@ -87,7 +84,7 @@
 
 #ifndef ALIGN_SIZE              /* so can override from jconfig.h */
 #ifndef WITH_SIMD
-#define ALIGN_SIZE  sizeof(double)
+#define ALIGN_SIZE  MAX(sizeof(void *), sizeof(double))
 #else
 #define ALIGN_SIZE  32 /* Most of the SIMD instructions we support require
                           16-byte (128-bit) alignment, but AVX2 requires
@@ -1032,7 +1029,7 @@
     large_pool_ptr next_lhdr_ptr = lhdr_ptr->next;
     space_freed = lhdr_ptr->bytes_used +
                   lhdr_ptr->bytes_left +
-                  sizeof(large_pool_hdr);
+                  sizeof(large_pool_hdr) + ALIGN_SIZE - 1;
     jpeg_free_large(cinfo, (void *)lhdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
     lhdr_ptr = next_lhdr_ptr;
@@ -1045,7 +1042,7 @@
   while (shdr_ptr != NULL) {
     small_pool_ptr next_shdr_ptr = shdr_ptr->next;
     space_freed = shdr_ptr->bytes_used + shdr_ptr->bytes_left +
-                  sizeof(small_pool_hdr);
+                  sizeof(small_pool_hdr) + ALIGN_SIZE - 1;
     jpeg_free_small(cinfo, (void *)shdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
     shdr_ptr = next_shdr_ptr;
@@ -1162,12 +1159,16 @@
    */
 #ifndef NO_GETENV
   {
-    char *memenv;
+    char memenv[30] = { 0 };
 
-    if ((memenv = getenv("JPEGMEM")) != NULL) {
+    if (!GETENV_S(memenv, 30, "JPEGMEM") && strlen(memenv) > 0) {
       char ch = 'x';
 
+#ifdef _MSC_VER
+      if (sscanf_s(memenv, "%ld%c", &max_to_use, &ch, 1) > 0) {
+#else
       if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+#endif
         if (ch == 'm' || ch == 'M')
           max_to_use *= 1000L;
         mem->pub.max_memory_to_use = max_to_use * 1000L;
diff --git a/jmemnobs.c b/jmemnobs.c
index 089be8f..cd6571b 100644
--- a/jmemnobs.c
+++ b/jmemnobs.c
@@ -22,11 +22,6 @@
 #include "jpeglib.h"
 #include "jmemsys.h"            /* import the system-dependent declarations */
 
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
-extern void *malloc(size_t size);
-extern void free(void *ptr);
-#endif
-
 
 /*
  * Memory allocation and freeing are controlled by the regular library
diff --git a/jmorecfg.h b/jmorecfg.h
index fb3a9cf..b33a991 100644
--- a/jmorecfg.h
+++ b/jmorecfg.h
@@ -100,11 +100,7 @@
 
 /* UINT16 must hold at least the values 0..65535. */
 
-#ifdef HAVE_UNSIGNED_SHORT
 typedef unsigned short UINT16;
-#else /* not HAVE_UNSIGNED_SHORT */
-typedef unsigned int UINT16;
-#endif /* HAVE_UNSIGNED_SHORT */
 
 /* INT16 must hold at least the values -32768..32767. */
 
diff --git a/jpegint.h b/jpegint.h
index 195fbcb..6af9e2a 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -5,8 +5,9 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, 2019, D. R. Commander.
+ * Copyright (C) 2015-2016, 2019, 2021, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2021, Alex Richardson.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -47,6 +48,18 @@
 /* JLONG must hold at least signed 32-bit values. */
 typedef long JLONG;
 
+/* JUINTPTR must hold pointer values. */
+#ifdef __UINTPTR_TYPE__
+/*
+ * __UINTPTR_TYPE__ is GNU-specific and available in GCC 4.6+ and Clang 3.0+.
+ * Fortunately, that is sufficient to support the few architectures for which
+ * sizeof(void *) != sizeof(size_t).  The only other options would require C99
+ * or Clang-specific builtins.
+ */
+typedef __UINTPTR_TYPE__ JUINTPTR;
+#else
+typedef size_t JUINTPTR;
+#endif
 
 /*
  * Left shift macro that handles a negative operand without causing any
@@ -360,12 +373,3 @@
 
 /* Arithmetic coding probability estimation tables in jaricom.c */
 extern const JLONG jpeg_aritab[];
-
-/* Suppress undefined-structure complaints if necessary. */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef AM_MEMORY_MANAGER       /* only jmemmgr.c defines these */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-#endif
-#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/jpegtran.1 b/jpegtran.1
deleted file mode 100644
index da7a266..0000000
--- a/jpegtran.1
+++ /dev/null
@@ -1,358 +0,0 @@
-.TH JPEGTRAN 1 "26 October 2020"
-.SH NAME
-jpegtran \- lossless transformation of JPEG files
-.SH SYNOPSIS
-.B jpegtran
-[
-.I options
-]
-[
-.I filename
-]
-.LP
-.SH DESCRIPTION
-.LP
-.B jpegtran
-performs various useful transformations of JPEG files.
-It can translate the coded representation from one variant of JPEG to another,
-for example from baseline JPEG to progressive JPEG or vice versa.  It can also
-perform some rearrangements of the image data, for example turning an image
-from landscape to portrait format by rotation.
-.PP
-For EXIF files and JPEG files containing Exif data, you may prefer to use
-.B exiftran
-instead.
-.PP
-.B jpegtran
-works by rearranging the compressed data (DCT coefficients), without
-ever fully decoding the image.  Therefore, its transformations are lossless:
-there is no image degradation at all, which would not be true if you used
-.B djpeg
-followed by
-.B cjpeg
-to accomplish the same conversion.  But by the same token,
-.B jpegtran
-cannot perform lossy operations such as changing the image quality.  However,
-while the image data is losslessly transformed, metadata can be removed.  See
-the
-.B \-copy
-option for specifics.
-.PP
-.B jpegtran
-reads the named JPEG/JFIF file, or the standard input if no file is
-named, and produces a JPEG/JFIF file on the standard output.
-.SH OPTIONS
-All switch names may be abbreviated; for example,
-.B \-optimize
-may be written
-.B \-opt
-or
-.BR \-o .
-Upper and lower case are equivalent.
-British spellings are also accepted (e.g.,
-.BR \-optimise ),
-though for brevity these are not mentioned below.
-.PP
-To specify the coded JPEG representation used in the output file,
-.B jpegtran
-accepts a subset of the switches recognized by
-.BR cjpeg :
-.TP
-.B \-optimize
-Perform optimization of entropy encoding parameters.
-.TP
-.B \-progressive
-Create progressive JPEG file.
-.TP
-.BI \-restart " N"
-Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is
-attached to the number.
-.TP
-.B \-arithmetic
-Use arithmetic coding.
-.TP
-.BI \-scans " file"
-Use the scan script given in the specified text file.
-.PP
-See
-.BR cjpeg (1)
-for more details about these switches.
-If you specify none of these switches, you get a plain baseline-JPEG output
-file.  The quality setting and so forth are determined by the input file.
-.PP
-The image can be losslessly transformed by giving one of these switches:
-.TP
-.B \-flip horizontal
-Mirror image horizontally (left-right).
-.TP
-.B \-flip vertical
-Mirror image vertically (top-bottom).
-.TP
-.B \-rotate 90
-Rotate image 90 degrees clockwise.
-.TP
-.B \-rotate 180
-Rotate image 180 degrees.
-.TP
-.B \-rotate 270
-Rotate image 270 degrees clockwise (or 90 ccw).
-.TP
-.B \-transpose
-Transpose image (across UL-to-LR axis).
-.TP
-.B \-transverse
-Transverse transpose (across UR-to-LL axis).
-.PP
-The transpose transformation has no restrictions regarding image dimensions.
-The other transformations operate rather oddly if the image dimensions are not
-a multiple of the iMCU size (usually 8 or 16 pixels), because they can only
-transform complete blocks of DCT coefficient data in the desired way.
-.PP
-.BR jpegtran 's
-default behavior when transforming an odd-size image is designed
-to preserve exact reversibility and mathematical consistency of the
-transformation set.  As stated, transpose is able to flip the entire image
-area.  Horizontal mirroring leaves any partial iMCU column at the right edge
-untouched, but is able to flip all rows of the image.  Similarly, vertical
-mirroring leaves any partial iMCU row at the bottom edge untouched, but is
-able to flip all columns.  The other transforms can be built up as sequences
-of transpose and flip operations; for consistency, their actions on edge
-pixels are defined to be the same as the end result of the corresponding
-transpose-and-flip sequence.
-.PP
-For practical use, you may prefer to discard any untransformable edge pixels
-rather than having a strange-looking strip along the right and/or bottom edges
-of a transformed image.  To do this, add the
-.B \-trim
-switch:
-.TP
-.B \-trim
-Drop non-transformable edge blocks.
-.IP
-Obviously, a transformation with
-.B \-trim
-is not reversible, so strictly speaking
-.B jpegtran
-with this switch is not lossless.  Also, the expected mathematical
-equivalences between the transformations no longer hold.  For example,
-.B \-rot 270 -trim
-trims only the bottom edge, but
-.B \-rot 90 -trim
-followed by
-.B \-rot 180 -trim
-trims both edges.
-.TP
-.B \-perfect
-If you are only interested in perfect transformations, add the
-.B \-perfect
-switch.  This causes
-.B jpegtran
-to fail with an error if the transformation is not perfect.
-.IP
-For example, you may want to do
-.IP
-.B (jpegtran \-rot 90 -perfect
-.I foo.jpg
-.B || djpeg
-.I foo.jpg
-.B | pnmflip \-r90 | cjpeg)
-.IP
-to do a perfect rotation, if available, or an approximated one if not.
-.PP
-This version of \fBjpegtran\fR also offers a lossless crop option, which
-discards data outside of a given image region but losslessly preserves what is
-inside.  Like the rotate and flip transforms, lossless crop is restricted by
-the current JPEG format; the upper left corner of the selected region must fall
-on an iMCU boundary.  If it doesn't, then it is silently moved up and/or left
-to the nearest iMCU boundary (the lower right corner is unchanged.)  Thus, the
-output image covers at least the requested region, but it may cover more.  The
-adjustment of the region dimensions may be optionally disabled by attaching an
-'f' character ("force") to the width or height number.
-
-The image can be losslessly cropped by giving the switch:
-.TP
-.B \-crop WxH+X+Y
-Crop the image to a rectangular region of width W and height H, starting at
-point X,Y.  The lossless crop feature discards data outside of a given image
-region but losslessly preserves what is inside.  Like the rotate and flip
-transforms, lossless crop is restricted by the current JPEG format; the upper
-left corner of the selected region must fall on an iMCU boundary.  If it
-doesn't, then it is silently moved up and/or left to the nearest iMCU boundary
-(the lower right corner is unchanged.)
-.PP
-If W or H is larger than the width/height of the input image, then the output
-image is expanded in size, and the expanded region is filled in with zeros
-(neutral gray).  Attaching an 'f' character ("flatten") to the width number
-will cause each block in the expanded region to be filled in with the DC
-coefficient of the nearest block in the input image rather than grayed out.
-Attaching an 'r' character ("reflect") to the width number will cause the
-expanded region to be filled in with repeated reflections of the input image
-rather than grayed out.
-.PP
-A complementary lossless wipe option is provided to discard (gray out) data
-inside a given image region while losslessly preserving what is outside:
-.TP
-.B \-wipe WxH+X+Y
-Wipe (gray out) a rectangular region of width W and height H from the input
-image, starting at point X,Y.
-.PP
-Attaching an 'f' character ("flatten") to the width number will cause the
-region to be filled with the average of adjacent blocks rather than grayed out.
-If the wipe region and the region outside the wipe region, when adjusted to the
-nearest iMCU boundary, form two horizontally adjacent rectangles, then
-attaching an 'r' character ("reflect") to the width number will cause the wipe
-region to be filled with repeated reflections of the outside region rather than
-grayed out.
-.PP
-A lossless drop option is also provided, which allows another JPEG image to be
-inserted ("dropped") into the input image data at a given position, replacing
-the existing image data at that position:
-.TP
-.B \-drop +X+Y filename
-Drop (insert) another image at point X,Y
-.PP
-Both the input image and the drop image must have the same subsampling level.
-It is best if they also have the same quantization (quality.)  Otherwise, the
-quantization of the output image will be adapted to accommodate the higher of
-the input image quality and the drop image quality.  The trim option can be
-used with the drop option to requantize the drop image to match the input
-image.  Note that a grayscale image can be dropped into a full-color image or
-vice versa, as long as the full-color image has no vertical subsampling.  If
-the input image is grayscale and the drop image is full-color, then the
-chrominance channels from the drop image will be discarded.
-.PP
-Other not-strictly-lossless transformation switches are:
-.TP
-.B \-grayscale
-Force grayscale output.
-.IP
-This option discards the chrominance channels if the input image is YCbCr
-(ie, a standard color JPEG), resulting in a grayscale JPEG file.  The
-luminance channel is preserved exactly, so this is a better method of reducing
-to grayscale than decompression, conversion, and recompression.  This switch
-is particularly handy for fixing a monochrome picture that was mistakenly
-encoded as a color JPEG.  (In such a case, the space savings from getting rid
-of the near-empty chroma channels won't be large; but the decoding time for
-a grayscale JPEG is substantially less than that for a color JPEG.)
-.PP
-.B jpegtran
-also recognizes these switches that control what to do with "extra" markers,
-such as comment blocks:
-.TP
-.B \-copy none
-Copy no extra markers from source file.  This setting suppresses all
-comments and other metadata in the source file.
-.TP
-.B \-copy comments
-Copy only comment markers.  This setting copies comments from the source file
-but discards any other metadata.
-.TP
-.B \-copy all
-Copy all extra markers.  This setting preserves miscellaneous markers
-found in the source file, such as JFIF thumbnails, Exif data, and Photoshop
-settings.  In some files, these extra markers can be sizable.  Note that this
-option will copy thumbnails as-is; they will not be transformed.
-.PP
-The default behavior is \fB-copy comments\fR.  (Note: in IJG releases v6 and
-v6a, \fBjpegtran\fR always did the equivalent of \fB-copy none\fR.)
-.PP
-Additional switches recognized by jpegtran are:
-.TP
-.BI \-icc " file"
-Embed ICC color management profile contained in the specified file.  Note that
-this will cause \fBjpegtran\fR to ignore any APP2 markers in the input file,
-even if \fB-copy all\fR is specified.
-.TP
-.BI \-maxmemory " N"
-Set limit for amount of memory to use in processing large images.  Value is
-in thousands of bytes, or millions of bytes if "M" is attached to the
-number.  For example,
-.B \-max 4m
-selects 4000000 bytes.  If more space is needed, an error will occur.
-.TP
-.BI \-maxscans " N"
-Abort if the input image contains more than
-.I N
-scans.  This feature demonstrates a method by which applications can guard
-against denial-of-service attacks instigated by specially-crafted malformed
-JPEG images containing numerous scans with missing image data or image data
-consisting only of "EOB runs" (a feature of progressive JPEG images that allows
-potentially hundreds of thousands of adjoining zero-value pixels to be
-represented using only a few bytes.)  Attempting to transform such malformed
-JPEG images can cause excessive CPU activity, since the decompressor must fully
-process each scan (even if the scan is corrupt) before it can proceed to the
-next scan.
-.TP
-.BI \-outfile " name"
-Send output image to the named file, not to standard output.
-.TP
-.BI \-report
-Report transformation progress.
-.TP
-.BI \-strict
-Treat all warnings as fatal.  This feature also demonstrates a method by which
-applications can guard against attacks instigated by specially-crafted
-malformed JPEG images.  Enabling this option will cause the decompressor to
-abort if the input image contains incomplete or corrupt image data.
-.TP
-.B \-verbose
-Enable debug printout.  More
-.BR \-v 's
-give more output.  Also, version information is printed at startup.
-.TP
-.B \-debug
-Same as
-.BR \-verbose .
-.TP
-.B \-version
-Print version information and exit.
-.SH EXAMPLES
-.LP
-This example converts a baseline JPEG file to progressive form:
-.IP
-.B jpegtran \-progressive
-.I foo.jpg
-.B >
-.I fooprog.jpg
-.PP
-This example rotates an image 90 degrees clockwise, discarding any
-unrotatable edge pixels:
-.IP
-.B jpegtran \-rot 90 -trim
-.I foo.jpg
-.B >
-.I foo90.jpg
-.SH ENVIRONMENT
-.TP
-.B JPEGMEM
-If this environment variable is set, its value is the default memory limit.
-The value is specified as described for the
-.B \-maxmemory
-switch.
-.B JPEGMEM
-overrides the default value specified when the program was compiled, and
-itself is overridden by an explicit
-.BR \-maxmemory .
-.SH SEE ALSO
-.BR cjpeg (1),
-.BR djpeg (1),
-.BR rdjpgcom (1),
-.BR wrjpgcom (1)
-.br
-Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
-Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44.
-.SH AUTHOR
-Independent JPEG Group
-.PP
-This file was modified by The libjpeg-turbo Project to include only information
-relevant to libjpeg-turbo and to wordsmith certain sections.
-.SH BUGS
-The transform options can't transform odd-size images perfectly.  Use
-.B \-trim
-or
-.B \-perfect
-if you don't like the results.
-.PP
-The entire image is read into memory and then written out again, even in
-cases where this isn't really necessary.  Expect swapping on large images,
-especially when using the more complex transform options.
diff --git a/jpegtran.c b/jpegtran.c
index 90fda7d..c7bee83 100644
--- a/jpegtran.c
+++ b/jpegtran.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-2019, Thomas G. Lane, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2014, 2017, 2019-2020, D. R. Commander.
+ * Copyright (C) 2010, 2014, 2017, 2019-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -14,21 +14,15 @@
  * provides some lossless and sort-of-lossless transformations of JPEG data.
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 #include "transupp.h"           /* Support routines for jpegtran */
 #include "jversion.h"           /* for version message */
 #include "jconfigint.h"
 
-#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
-#ifdef __MWERKS__
-#include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>            /* ... and this */
-#endif
-#ifdef THINK_C
-#include <console.h>            /* Think declares it here */
-#endif
-#endif
-
 
 /*
  * Argument-parsing code.
@@ -64,6 +58,7 @@
   fprintf(stderr, "Switches (names may be abbreviated):\n");
   fprintf(stderr, "  -copy none     Copy no extra markers from source file\n");
   fprintf(stderr, "  -copy comments Copy only comment markers (default)\n");
+  fprintf(stderr, "  -copy icc      Copy only ICC profile markers\n");
   fprintf(stderr, "  -copy all      Copy all extra markers\n");
 #ifdef ENTROPY_OPT_SUPPORTED
   fprintf(stderr, "  -optimize      Optimize Huffman table (smaller file, but slow compression)\n");
@@ -196,6 +191,8 @@
         copyoption = JCOPYOPT_NONE;
       } else if (keymatch(argv[argn], "comments", 1)) {
         copyoption = JCOPYOPT_COMMENTS;
+      } else if (keymatch(argv[argn], "icc", 1)) {
+        copyoption = JCOPYOPT_ICC;
       } else if (keymatch(argv[argn], "all", 1)) {
         copyoption = JCOPYOPT_ALL;
       } else
@@ -480,11 +477,6 @@
   JOCTET *icc_profile = NULL;
   long icc_len = 0;
 
-  /* On Mac, fetch a command line. */
-#ifdef USE_CCOMMAND
-  argc = ccommand(&argv);
-#endif
-
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
     progname = "jpegtran";      /* in case C library doesn't provide it */
@@ -574,6 +566,8 @@
     fclose(icc_file);
     if (copyoption == JCOPYOPT_ALL)
       copyoption = JCOPYOPT_ALL_EXCEPT_ICC;
+    if (copyoption == JCOPYOPT_ICC)
+      copyoption = JCOPYOPT_NONE;
   }
 
   if (report) {
diff --git a/jsimd.h b/jsimd.h
index 6c20365..74d480a 100644
--- a/jsimd.h
+++ b/jsimd.h
@@ -2,8 +2,8 @@
  * jsimd.h
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2011, 2014, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2011, 2014, 2022, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
@@ -114,10 +114,10 @@
 
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
diff --git a/jsimd_none.c b/jsimd_none.c
index 5b38a9f..a25db73 100644
--- a/jsimd_none.c
+++ b/jsimd_none.c
@@ -2,8 +2,8 @@
  * jsimd_none.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2014, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2014, 2022, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
@@ -412,7 +412,7 @@
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
 }
 
@@ -425,7 +425,7 @@
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return 0;
 }
diff --git a/jstdhuff.c b/jstdhuff.c
index 036d649..345b513 100644
--- a/jstdhuff.c
+++ b/jstdhuff.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2013, D. R. Commander.
+ * Copyright (C) 2013, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -29,7 +29,7 @@
     return;
 
   /* Copy the number-of-symbols-of-each-code-length counts */
-  MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+  memcpy((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
 
   /* Validate the counts.  We do this here mainly so we can copy the right
    * number of symbols from the val[] array, without risking marching off
@@ -41,8 +41,9 @@
   if (nsymbols < 1 || nsymbols > 256)
     ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
 
-  MEMCOPY((*htblptr)->huffval, val, nsymbols * sizeof(UINT8));
-  MEMZERO(&((*htblptr)->huffval[nsymbols]), (256 - nsymbols) * sizeof(UINT8));
+  memcpy((*htblptr)->huffval, val, nsymbols * sizeof(UINT8));
+  memset(&((*htblptr)->huffval[nsymbols]), 0,
+         (256 - nsymbols) * sizeof(UINT8));
 
   /* Initialize sent_table FALSE so table will be written to JPEG file. */
   (*htblptr)->sent_table = FALSE;
diff --git a/jutils.c b/jutils.c
index 5c5bb17..d862716 100644
--- a/jutils.c
+++ b/jutils.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code
- * relevant to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -110,7 +110,7 @@
   for (row = num_rows; row > 0; row--) {
     inptr = *input_array++;
     outptr = *output_array++;
-    MEMCOPY(outptr, inptr, count);
+    memcpy(outptr, inptr, count);
   }
 }
 
@@ -120,7 +120,7 @@
                 JDIMENSION num_blocks)
 /* Copy a row of coefficient blocks from one place to another. */
 {
-  MEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF)));
+  memcpy(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF)));
 }
 
 
@@ -129,5 +129,5 @@
 /* Zero out a chunk of memory. */
 /* This might be sample-array data, block-array data, or alloc_large data. */
 {
-  MEMZERO(target, bytestozero);
+  memset(target, 0, bytestozero);
 }
diff --git a/jversion.h b/jversion.h
index 2ab534a..3d1c61a 100644
--- a/jversion.h
+++ b/jversion.h
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2012-2021, D. R. Commander.
+ * Copyright (C) 2010, 2012-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -37,7 +37,7 @@
  */
 
 #define JCOPYRIGHT \
-  "Copyright (C) 2009-2021 D. R. Commander\n" \
+  "Copyright (C) 2009-2023 D. R. Commander\n" \
   "Copyright (C) 2015, 2020 Google, Inc.\n" \
   "Copyright (C) 2019-2020 Arm Limited\n" \
   "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
@@ -51,4 +51,4 @@
   "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
 
 #define JCOPYRIGHT_SHORT \
-  "Copyright (C) 1991-2021 The libjpeg-turbo Project and many others"
+  "Copyright (C) 1991-2022 The libjpeg-turbo Project and many others"
diff --git a/libjpeg.txt b/libjpeg.txt
index 3c680b5..309f9d3 100644
--- a/libjpeg.txt
+++ b/libjpeg.txt
@@ -3,7 +3,7 @@
 This file was part of the Independent JPEG Group's software:
 Copyright (C) 1994-2013, Thomas G. Lane, Guido Vollbeding.
 libjpeg-turbo Modifications:
-Copyright (C) 2010, 2014-2018, 2020, D. R. Commander.
+Copyright (C) 2010, 2014-2018, 2020, 2022, D. R. Commander.
 Copyright (C) 2015, Google, Inc.
 For conditions of distribution and use, see the accompanying README.ijg file.
 
@@ -840,18 +840,7 @@
 machines) and reference it at your link step.  If you use only half of the
 library (only compression or only decompression), only that much code will be
 included from the library, unless your linker is hopelessly brain-damaged.
-The supplied makefiles build libjpeg.a automatically (see install.txt).
-
-While you can build the JPEG library as a shared library if the whim strikes
-you, we don't really recommend it.  The trouble with shared libraries is that
-at some point you'll probably try to substitute a new version of the library
-without recompiling the calling applications.  That generally doesn't work
-because the parameter struct declarations usually change with each new
-version.  In other words, the library's API is *not* guaranteed binary
-compatible across versions; we only try to ensure source-code compatibility.
-(In hindsight, it might have been smarter to hide the parameter structs from
-applications and introduce a ton of access functions instead.  Too late now,
-however.)
+The supplied build system builds libjpeg.a automatically.
 
 It may be worth pointing out that the core JPEG library does not actually
 require the stdio library: only the default source/destination managers and
@@ -3075,9 +3064,8 @@
 larger than a char, so it affects the surrounding application's image data.
 The sample applications cjpeg and djpeg can support 12-bit mode only for PPM
 and GIF file formats; you must disable the other file formats to compile a
-12-bit cjpeg or djpeg.  (install.txt has more information about that.)
-At present, a 12-bit library can handle *only* 12-bit images, not both
-precisions.
+12-bit cjpeg or djpeg.  At present, a 12-bit library can handle *only* 12-bit
+images, not both precisions.
 
 Note that a 12-bit library always compresses in Huffman optimization mode,
 in order to generate valid Huffman tables.  This is necessary because our
diff --git a/md5/md5hl.c b/md5/md5hl.c
index 8a4a762..849a136 100644
--- a/md5/md5hl.c
+++ b/md5/md5hl.c
@@ -6,7 +6,7 @@
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  * libjpeg-turbo Modifications:
- * Copyright (C)2016, 2018-2019 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2016, 2018-2019, 2022 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -34,6 +34,10 @@
  * ----------------------------------------------------------------------------
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
diff --git a/rdbmp.c b/rdbmp.c
index 358a026..433ebe2 100644
--- a/rdbmp.c
+++ b/rdbmp.c
@@ -6,7 +6,7 @@
  * Modified 2009-2017 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
  * Modified 2011 by Siarhei Siamashka.
- * Copyright (C) 2015, 2017-2018, 2021, D. R. Commander.
+ * Copyright (C) 2015, 2017-2018, 2021-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -39,7 +39,7 @@
 
 
 #define ReadOK(file, buffer, len) \
-  (JFREAD(file, buffer, len) == ((size_t)(len)))
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
 
 static int alpha_index[JPEG_NUMCS] = {
   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
@@ -125,7 +125,8 @@
     break;
   }
 
-  if (sinfo->cinfo->in_color_space == JCS_UNKNOWN && gray)
+  if ((sinfo->cinfo->in_color_space == JCS_UNKNOWN ||
+       sinfo->cinfo->in_color_space == JCS_RGB) && gray)
     sinfo->cinfo->in_color_space = JCS_GRAYSCALE;
 
   if (sinfo->cinfo->in_color_space == JCS_GRAYSCALE && !gray)
@@ -245,7 +246,7 @@
    */
   outptr = source->pub.buffer[0];
   if (cinfo->in_color_space == JCS_EXT_BGR) {
-    MEMCOPY(outptr, inptr, source->row_width);
+    memcpy(outptr, inptr, source->row_width);
   } else if (cinfo->in_color_space == JCS_CMYK) {
     for (col = cinfo->image_width; col > 0; col--) {
       JSAMPLE b = *inptr++, g = *inptr++, r = *inptr++;
@@ -309,7 +310,7 @@
   outptr = source->pub.buffer[0];
   if (cinfo->in_color_space == JCS_EXT_BGRX ||
       cinfo->in_color_space == JCS_EXT_BGRA) {
-    MEMCOPY(outptr, inptr, source->row_width);
+    memcpy(outptr, inptr, source->row_width);
   } else if (cinfo->in_color_space == JCS_CMYK) {
     for (col = cinfo->image_width; col > 0; col--) {
       JSAMPLE b = *inptr++, g = *inptr++, r = *inptr++;
diff --git a/rdgif.c b/rdgif.c
index c814c6b..bdf7401 100644
--- a/rdgif.c
+++ b/rdgif.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2019 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2021, D. R. Commander.
+ * Copyright (C) 2021-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -45,7 +45,7 @@
 
 
 #define ReadOK(file, buffer, len) \
-  (JFREAD(file, buffer, len) == ((size_t)(len)))
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
 
 
 #define MAXCOLORMAPSIZE  256    /* max # of colors in a GIF colormap */
@@ -345,7 +345,7 @@
 ReadColorMap(gif_source_ptr sinfo, int cmaplen, JSAMPARRAY cmap)
 /* Read a GIF colormap */
 {
-  int i;
+  int i, gray = 1;
 
   for (i = 0; i < cmaplen; i++) {
 #if BITS_IN_JSAMPLE == 8
@@ -356,6 +356,14 @@
     cmap[CM_RED][i]   = (JSAMPLE)UPSCALE(ReadByte(sinfo));
     cmap[CM_GREEN][i] = (JSAMPLE)UPSCALE(ReadByte(sinfo));
     cmap[CM_BLUE][i]  = (JSAMPLE)UPSCALE(ReadByte(sinfo));
+    if (cmap[CM_RED][i] != cmap[CM_GREEN][i] ||
+        cmap[CM_GREEN][i] != cmap[CM_BLUE][i])
+      gray = 0;
+  }
+
+  if (sinfo->cinfo->in_color_space == JCS_RGB && gray) {
+    sinfo->cinfo->in_color_space = JCS_GRAYSCALE;
+    sinfo->cinfo->input_components = 1;
   }
 }
 
@@ -516,10 +524,15 @@
     source->pub.get_pixel_rows = get_pixel_rows;
   }
 
+  if (cinfo->in_color_space != JCS_GRAYSCALE) {
+    cinfo->in_color_space = JCS_RGB;
+    cinfo->input_components = NUMCOLORS;
+  }
+
   /* Create compressor input buffer. */
   source->pub.buffer = (*cinfo->mem->alloc_sarray)
-    ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)width * NUMCOLORS,
-     (JDIMENSION)1);
+    ((j_common_ptr)cinfo, JPOOL_IMAGE,
+     (JDIMENSION)width * cinfo->input_components, (JDIMENSION)1);
   source->pub.buffer_height = 1;
 
   /* Pad colormap for safety. */
@@ -530,8 +543,6 @@
   }
 
   /* Return info about the image. */
-  cinfo->in_color_space = JCS_RGB;
-  cinfo->input_components = NUMCOLORS;
   cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */
   cinfo->image_width = width;
   cinfo->image_height = height;
@@ -556,11 +567,18 @@
   register JSAMPARRAY colormap = source->colormap;
 
   ptr = source->pub.buffer[0];
-  for (col = cinfo->image_width; col > 0; col--) {
-    c = LZWReadByte(source);
-    *ptr++ = colormap[CM_RED][c];
-    *ptr++ = colormap[CM_GREEN][c];
-    *ptr++ = colormap[CM_BLUE][c];
+  if (cinfo->in_color_space == JCS_GRAYSCALE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = LZWReadByte(source);
+      *ptr++ = colormap[CM_RED][c];
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = LZWReadByte(source);
+      *ptr++ = colormap[CM_RED][c];
+      *ptr++ = colormap[CM_GREEN][c];
+      *ptr++ = colormap[CM_BLUE][c];
+    }
   }
   return 1;
 }
@@ -646,11 +664,18 @@
      FALSE);
   /* Scan the row, expand colormap, and output */
   ptr = source->pub.buffer[0];
-  for (col = cinfo->image_width; col > 0; col--) {
-    c = *sptr++;
-    *ptr++ = colormap[CM_RED][c];
-    *ptr++ = colormap[CM_GREEN][c];
-    *ptr++ = colormap[CM_BLUE][c];
+  if (cinfo->in_color_space == JCS_GRAYSCALE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = *sptr++;
+      *ptr++ = colormap[CM_RED][c];
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = *sptr++;
+      *ptr++ = colormap[CM_RED][c];
+      *ptr++ = colormap[CM_GREEN][c];
+      *ptr++ = colormap[CM_BLUE][c];
+    }
   }
   source->cur_row_number++;     /* for next time */
   return 1;
diff --git a/rdjpgcom.1 b/rdjpgcom.1
deleted file mode 100644
index 97611df..0000000
--- a/rdjpgcom.1
+++ /dev/null
@@ -1,63 +0,0 @@
-.TH RDJPGCOM 1 "02 April 2009"
-.SH NAME
-rdjpgcom \- display text comments from a JPEG file
-.SH SYNOPSIS
-.B rdjpgcom
-[
-.B \-raw
-]
-[
-.B \-verbose
-]
-[
-.I filename
-]
-.LP
-.SH DESCRIPTION
-.LP
-.B rdjpgcom
-reads the named JPEG/JFIF file, or the standard input if no file is named,
-and prints any text comments found in the file on the standard output.
-.PP
-The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file.
-Although the standard doesn't actually define what COM blocks are for, they
-are widely used to hold user-supplied text strings.  This lets you add
-annotations, titles, index terms, etc to your JPEG files, and later retrieve
-them as text.  COM blocks do not interfere with the image stored in the JPEG
-file.  The maximum size of a COM block is 64K, but you can have as many of
-them as you like in one JPEG file.
-.SH OPTIONS
-.TP
-.B \-raw
-Normally
-.B rdjpgcom
-escapes non-printable characters in comments, for security reasons.
-This option avoids that.
-.PP
-.B \-verbose
-Causes
-.B rdjpgcom
-to also display the JPEG image dimensions.
-.PP
-Switch names may be abbreviated, and are not case sensitive.
-.SH HINTS
-.B rdjpgcom
-does not depend on the IJG JPEG library.  Its source code is intended as an
-illustration of the minimum amount of code required to parse a JPEG file
-header correctly.
-.PP
-In
-.B \-verbose
-mode,
-.B rdjpgcom
-will also attempt to print the contents of any "APP12" markers as text.
-Some digital cameras produce APP12 markers containing useful textual
-information.  If you like, you can modify the source code to print
-other APPn marker types as well.
-.SH SEE ALSO
-.BR cjpeg (1),
-.BR djpeg (1),
-.BR jpegtran (1),
-.BR wrjpgcom (1)
-.SH AUTHOR
-Independent JPEG Group
diff --git a/rdjpgcom.c b/rdjpgcom.c
index 620270e..d9a6f85 100644
--- a/rdjpgcom.c
+++ b/rdjpgcom.c
@@ -4,8 +4,8 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * Modified 2009 by Bill Allombert, Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -15,12 +15,14 @@
  * JPEG markers.
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #define JPEG_CJPEG_DJPEG        /* to get the command-line config symbols */
 #include "jinclude.h"           /* get auto-config symbols, <stdio.h> */
 
-#ifdef HAVE_LOCALE_H
 #include <locale.h>             /* Bill Allombert: use locale for isprint */
-#endif
 #include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef USE_SETMODE
 #include <fcntl.h>              /* to declare setmode()'s parameter macros */
@@ -28,16 +30,6 @@
 #include <io.h>                 /* to declare setmode() */
 #endif
 
-#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
-#ifdef __MWERKS__
-#include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>            /* ... and this */
-#endif
-#ifdef THINK_C
-#include <console.h>            /* Think declares it here */
-#endif
-#endif
-
 #ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
 #define READ_BINARY     "r"
 #else
@@ -223,9 +215,7 @@
   int lastch = 0;
 
   /* Bill Allombert: set locale properly for isprint */
-#ifdef HAVE_LOCALE_H
   setlocale(LC_CTYPE, "");
-#endif
 
   /* Get the marker parameter length count */
   length = read_2_bytes();
@@ -253,7 +243,7 @@
     } else if (isprint(ch)) {
       putc(ch, stdout);
     } else {
-      printf("\\%03o", ch);
+      printf("\\%03o", (unsigned int)ch);
     }
     lastch = ch;
     length--;
@@ -261,9 +251,7 @@
   printf("\n");
 
   /* Bill Allombert: revert to C locale */
-#ifdef HAVE_LOCALE_H
   setlocale(LC_CTYPE, "C");
-#endif
 }
 
 
@@ -452,11 +440,6 @@
   char *arg;
   int verbose = 0, raw = 0;
 
-  /* On Mac, fetch a command line. */
-#ifdef USE_CCOMMAND
-  argc = ccommand(&argv);
-#endif
-
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
     progname = "rdjpgcom";      /* in case C library doesn't provide it */
diff --git a/rdppm.c b/rdppm.c
index 9699ca5..883641d 100644
--- a/rdppm.c
+++ b/rdppm.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2009 by Bill Allombert, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2017, 2020-2021, D. R. Commander.
+ * Copyright (C) 2015-2017, 2020-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -48,7 +48,7 @@
 
 
 #define ReadOK(file, buffer, len) \
-  (JFREAD(file, buffer, len) == ((size_t)(len)))
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
 
 static int alpha_index[JPEG_NUMCS] = {
   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
@@ -178,16 +178,16 @@
   ptr = source->pub.buffer[0];
   if (maxval == MAXJSAMPLE) {
     if (aindex >= 0)
-      GRAY_RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval),
-                         ptr[aindex] = 0xFF;)
+      GRAY_RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval),
+                         ptr[aindex] = MAXJSAMPLE;)
     else
-      GRAY_RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval),)
+      GRAY_RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval), {})
   } else {
     if (aindex >= 0)
       GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],
-                         ptr[aindex] = 0xFF;)
+                         ptr[aindex] = MAXJSAMPLE;)
     else
-      GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],)
+      GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], {})
   }
   return 1;
 }
@@ -208,7 +208,7 @@
   ptr = source->pub.buffer[0];
   if (maxval == MAXJSAMPLE) {
     for (col = cinfo->image_width; col > 0; col--) {
-      JSAMPLE gray = read_pbm_integer(cinfo, infile, maxval);
+      JSAMPLE gray = (JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
       rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3);
       ptr += 4;
     }
@@ -252,16 +252,16 @@
   ptr = source->pub.buffer[0];
   if (maxval == MAXJSAMPLE) {
     if (aindex >= 0)
-      RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval),
-                    ptr[aindex] = 0xFF;)
+      RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval),
+                    ptr[aindex] = MAXJSAMPLE;)
     else
-      RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval),)
+      RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval), {})
   } else {
     if (aindex >= 0)
       RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],
-                    ptr[aindex] = 0xFF;)
+                    ptr[aindex] = MAXJSAMPLE;)
     else
-      RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],)
+      RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], {})
   }
   return 1;
 }
@@ -282,9 +282,9 @@
   ptr = source->pub.buffer[0];
   if (maxval == MAXJSAMPLE) {
     for (col = cinfo->image_width; col > 0; col--) {
-      JSAMPLE r = read_pbm_integer(cinfo, infile, maxval);
-      JSAMPLE g = read_pbm_integer(cinfo, infile, maxval);
-      JSAMPLE b = read_pbm_integer(cinfo, infile, maxval);
+      JSAMPLE r = (JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
+      JSAMPLE g = (JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
+      JSAMPLE b = (JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
       rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3);
       ptr += 4;
     }
@@ -345,14 +345,14 @@
   bufferptr = source->iobuffer;
   if (maxval == MAXJSAMPLE) {
     if (aindex >= 0)
-      GRAY_RGB_READ_LOOP(*bufferptr++, ptr[aindex] = 0xFF;)
+      GRAY_RGB_READ_LOOP(*bufferptr++, ptr[aindex] = MAXJSAMPLE;)
     else
-      GRAY_RGB_READ_LOOP(*bufferptr++,)
+      GRAY_RGB_READ_LOOP(*bufferptr++, {})
   } else {
     if (aindex >= 0)
-      GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = 0xFF;)
+      GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = MAXJSAMPLE;)
     else
-      GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)],)
+      GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], {})
   }
   return 1;
 }
@@ -413,14 +413,14 @@
   bufferptr = source->iobuffer;
   if (maxval == MAXJSAMPLE) {
     if (aindex >= 0)
-      RGB_READ_LOOP(*bufferptr++, ptr[aindex] = 0xFF;)
+      RGB_READ_LOOP(*bufferptr++, ptr[aindex] = MAXJSAMPLE;)
     else
-      RGB_READ_LOOP(*bufferptr++,)
+      RGB_READ_LOOP(*bufferptr++, {})
   } else {
     if (aindex >= 0)
-      RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = 0xFF;)
+      RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = MAXJSAMPLE;)
     else
-      RGB_READ_LOOP(rescale[UCH(*bufferptr++)],)
+      RGB_READ_LOOP(rescale[UCH(*bufferptr++)], {})
   }
   return 1;
 }
@@ -543,7 +543,7 @@
       ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
     ptr[bindex] = rescale[temp];
     if (aindex >= 0)
-      ptr[aindex] = 0xFF;
+      ptr[aindex] = MAXJSAMPLE;
     ptr += ps;
   }
   return 1;
@@ -603,7 +603,8 @@
 
   switch (c) {
   case '2':                     /* it's a text-format PGM file */
-    if (cinfo->in_color_space == JCS_UNKNOWN)
+    if (cinfo->in_color_space == JCS_UNKNOWN ||
+        cinfo->in_color_space == JCS_RGB)
       cinfo->in_color_space = JCS_GRAYSCALE;
     TRACEMS2(cinfo, 1, JTRC_PGM_TEXT, w, h);
     if (cinfo->in_color_space == JCS_GRAYSCALE)
@@ -631,7 +632,8 @@
     break;
 
   case '5':                     /* it's a raw-format PGM file */
-    if (cinfo->in_color_space == JCS_UNKNOWN)
+    if (cinfo->in_color_space == JCS_UNKNOWN ||
+        cinfo->in_color_space == JCS_RGB)
       cinfo->in_color_space = JCS_GRAYSCALE;
     TRACEMS2(cinfo, 1, JTRC_PGM, w, h);
     if (maxval > 255) {
@@ -730,8 +732,8 @@
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                   (size_t)(((long)MAX(maxval, 255) + 1L) *
                                            sizeof(JSAMPLE)));
-    MEMZERO(source->rescale, (size_t)(((long)MAX(maxval, 255) + 1L) *
-                                      sizeof(JSAMPLE)));
+    memset(source->rescale, 0, (size_t)(((long)MAX(maxval, 255) + 1L) *
+                                        sizeof(JSAMPLE)));
     half_maxval = maxval / 2;
     for (val = 0; val <= (long)maxval; val++) {
       /* The multiplication here must be done in 32 bits to avoid overflow */
diff --git a/rdswitch.c b/rdswitch.c
index 886fec3..33449c8 100644
--- a/rdswitch.c
+++ b/rdswitch.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2018, D. R. Commander.
+ * Copyright (C) 2010, 2018, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -17,6 +17,10 @@
  *      -sample HxV[,HxV,...]   Set component sampling factors
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 #include <ctype.h>              /* to declare isdigit(), isspace() */
 
@@ -263,7 +267,7 @@
     scanptr = (jpeg_scan_info *)
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                   scanno * sizeof(jpeg_scan_info));
-    MEMCOPY(scanptr, scans, scanno * sizeof(jpeg_scan_info));
+    memcpy(scanptr, scans, scanno * sizeof(jpeg_scan_info));
     cinfo->scan_info = scanptr;
     cinfo->num_scans = scanno;
   }
diff --git a/rdtarga.c b/rdtarga.c
index 8f2d031..3ed7eb3 100644
--- a/rdtarga.c
+++ b/rdtarga.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * Modified 2017 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2018, 2021, D. R. Commander.
+ * Copyright (C) 2018, 2021-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -33,7 +33,7 @@
 
 
 #define ReadOK(file, buffer, len) \
-  (JFREAD(file, buffer, len) == ((size_t)(len)))
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
 
 
 /* Private version of data source object */
diff --git a/simd/arm/aarch32/jsimd.c b/simd/arm/aarch32/jsimd.c
index fac55df..04d6452 100644
--- a/simd/arm/aarch32/jsimd.c
+++ b/simd/arm/aarch32/jsimd.c
@@ -3,8 +3,8 @@
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2019, Google LLC.
  * Copyright (C) 2020, Arm Limited.
  *
@@ -25,12 +25,10 @@
 #include "../../../jsimddct.h"
 #include "../../jsimd.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
 
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
 
@@ -98,14 +96,12 @@
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
 {
 #ifndef NO_GETENV
-  char *env = NULL;
+  char env[2] = { 0 };
 #endif
 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
   int bufsize = 1024; /* an initial guess for the line buffer size limit */
@@ -131,14 +127,11 @@
 
 #ifndef NO_GETENV
   /* Force different settings through environment variables */
-  env = getenv("JSIMD_FORCENEON");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
     simd_support = JSIMD_NEON;
-  env = getenv("JSIMD_FORCENONE");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
     simd_support = 0;
-  env = getenv("JSIMD_NOHUFFENC");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
     simd_huffman = 0;
 #endif
 }
@@ -950,7 +943,7 @@
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -975,7 +968,7 @@
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
                                                  jpeg_natural_order_start, Sl,
diff --git a/simd/arm/aarch64/jchuff-neon.c b/simd/arm/aarch64/jchuff-neon.c
index f13fd1b..607a116 100644
--- a/simd/arm/aarch64/jchuff-neon.c
+++ b/simd/arm/aarch64/jchuff-neon.c
@@ -2,7 +2,7 @@
  * jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon)
  *
  * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2022, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -59,6 +59,17 @@
    14,  15,  30,  31,  44,  45,  46,  47
 };
 
+/* The AArch64 implementation of the FLUSH() macro triggers a UBSan misaligned
+ * address warning because the macro sometimes writes a 64-bit value to a
+ * non-64-bit-aligned address.  That behavior is technically undefined per
+ * the C specification, but it is supported by the AArch64 architecture and
+ * compilers.
+ */
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+__attribute__((no_sanitize("alignment")))
+#endif
+#endif
 JOCTET *jsimd_huff_encode_one_block_neon(void *state, JOCTET *buffer,
                                          JCOEFPTR block, int last_dc_val,
                                          c_derived_tbl *dctbl,
@@ -158,73 +169,43 @@
                                   7), row6, 5);
 
   /* DCT block is now in zig-zag order; start Huffman encoding process. */
-  int16x8_t abs_row0 = vabsq_s16(row0);
-  int16x8_t abs_row1 = vabsq_s16(row1);
-  int16x8_t abs_row2 = vabsq_s16(row2);
-  int16x8_t abs_row3 = vabsq_s16(row3);
-  int16x8_t abs_row4 = vabsq_s16(row4);
-  int16x8_t abs_row5 = vabsq_s16(row5);
-  int16x8_t abs_row6 = vabsq_s16(row6);
-  int16x8_t abs_row7 = vabsq_s16(row7);
-
-  /* For negative coeffs: diff = abs(coeff) -1 = ~abs(coeff) */
-  uint16x8_t row0_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row0, vshrq_n_s16(row0, 15)));
-  uint16x8_t row1_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row1, vshrq_n_s16(row1, 15)));
-  uint16x8_t row2_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row2, vshrq_n_s16(row2, 15)));
-  uint16x8_t row3_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row3, vshrq_n_s16(row3, 15)));
-  uint16x8_t row4_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row4, vshrq_n_s16(row4, 15)));
-  uint16x8_t row5_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row5, vshrq_n_s16(row5, 15)));
-  uint16x8_t row6_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row6, vshrq_n_s16(row6, 15)));
-  uint16x8_t row7_diff =
-    vreinterpretq_u16_s16(veorq_s16(abs_row7, vshrq_n_s16(row7, 15)));
 
   /* Construct bitmap to accelerate encoding of AC coefficients.  A set bit
    * means that the corresponding coefficient != 0.
    */
-  uint8x8_t abs_row0_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row0),
-                                               vdupq_n_u16(0)));
-  uint8x8_t abs_row1_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row1),
-                                               vdupq_n_u16(0)));
-  uint8x8_t abs_row2_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row2),
-                                               vdupq_n_u16(0)));
-  uint8x8_t abs_row3_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row3),
-                                               vdupq_n_u16(0)));
-  uint8x8_t abs_row4_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row4),
-                                               vdupq_n_u16(0)));
-  uint8x8_t abs_row5_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row5),
-                                               vdupq_n_u16(0)));
-  uint8x8_t abs_row6_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row6),
-                                               vdupq_n_u16(0)));
-  uint8x8_t abs_row7_gt0 = vmovn_u16(vcgtq_u16(vreinterpretq_u16_s16(abs_row7),
-                                               vdupq_n_u16(0)));
+  uint16x8_t row0_ne_0 = vtstq_s16(row0, row0);
+  uint16x8_t row1_ne_0 = vtstq_s16(row1, row1);
+  uint16x8_t row2_ne_0 = vtstq_s16(row2, row2);
+  uint16x8_t row3_ne_0 = vtstq_s16(row3, row3);
+  uint16x8_t row4_ne_0 = vtstq_s16(row4, row4);
+  uint16x8_t row5_ne_0 = vtstq_s16(row5, row5);
+  uint16x8_t row6_ne_0 = vtstq_s16(row6, row6);
+  uint16x8_t row7_ne_0 = vtstq_s16(row7, row7);
+
+  uint8x16_t row10_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row1_ne_0),
+                                    vreinterpretq_u8_u16(row0_ne_0));
+  uint8x16_t row32_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row3_ne_0),
+                                    vreinterpretq_u8_u16(row2_ne_0));
+  uint8x16_t row54_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row5_ne_0),
+                                    vreinterpretq_u8_u16(row4_ne_0));
+  uint8x16_t row76_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row7_ne_0),
+                                    vreinterpretq_u8_u16(row6_ne_0));
 
   /* { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 } */
-  const uint8x8_t bitmap_mask =
-    vreinterpret_u8_u64(vmov_n_u64(0x0102040810204080));
+  const uint8x16_t bitmap_mask =
+    vreinterpretq_u8_u64(vdupq_n_u64(0x0102040810204080));
 
-  abs_row0_gt0 = vand_u8(abs_row0_gt0, bitmap_mask);
-  abs_row1_gt0 = vand_u8(abs_row1_gt0, bitmap_mask);
-  abs_row2_gt0 = vand_u8(abs_row2_gt0, bitmap_mask);
-  abs_row3_gt0 = vand_u8(abs_row3_gt0, bitmap_mask);
-  abs_row4_gt0 = vand_u8(abs_row4_gt0, bitmap_mask);
-  abs_row5_gt0 = vand_u8(abs_row5_gt0, bitmap_mask);
-  abs_row6_gt0 = vand_u8(abs_row6_gt0, bitmap_mask);
-  abs_row7_gt0 = vand_u8(abs_row7_gt0, bitmap_mask);
+  uint8x16_t bitmap_rows_10 = vandq_u8(row10_ne_0, bitmap_mask);
+  uint8x16_t bitmap_rows_32 = vandq_u8(row32_ne_0, bitmap_mask);
+  uint8x16_t bitmap_rows_54 = vandq_u8(row54_ne_0, bitmap_mask);
+  uint8x16_t bitmap_rows_76 = vandq_u8(row76_ne_0, bitmap_mask);
 
-  uint8x8_t bitmap_rows_10 = vpadd_u8(abs_row1_gt0, abs_row0_gt0);
-  uint8x8_t bitmap_rows_32 = vpadd_u8(abs_row3_gt0, abs_row2_gt0);
-  uint8x8_t bitmap_rows_54 = vpadd_u8(abs_row5_gt0, abs_row4_gt0);
-  uint8x8_t bitmap_rows_76 = vpadd_u8(abs_row7_gt0, abs_row6_gt0);
-  uint8x8_t bitmap_rows_3210 = vpadd_u8(bitmap_rows_32, bitmap_rows_10);
-  uint8x8_t bitmap_rows_7654 = vpadd_u8(bitmap_rows_76, bitmap_rows_54);
-  uint8x8_t bitmap_all = vpadd_u8(bitmap_rows_7654, bitmap_rows_3210);
+  uint8x16_t bitmap_rows_3210 = vpaddq_u8(bitmap_rows_32, bitmap_rows_10);
+  uint8x16_t bitmap_rows_7654 = vpaddq_u8(bitmap_rows_76, bitmap_rows_54);
+  uint8x16_t bitmap_rows_76543210 = vpaddq_u8(bitmap_rows_7654,
+                                              bitmap_rows_3210);
+  uint8x8_t bitmap_all = vpadd_u8(vget_low_u8(bitmap_rows_76543210),
+                                  vget_high_u8(bitmap_rows_76543210));
 
   /* Shift left to remove DC bit. */
   bitmap_all =
@@ -241,16 +222,16 @@
 
   /* Encode DC coefficient. */
 
+  /* For negative coeffs: diff = abs(coeff) -1 = ~abs(coeff) */
+  int16x8_t abs_row0 = vabsq_s16(row0);
+  int16x8_t row0_lz = vclzq_s16(abs_row0);
+  uint16x8_t row0_mask = vshlq_u16(vcltzq_s16(row0), vnegq_s16(row0_lz));
+  uint16x8_t row0_diff = veorq_u16(vreinterpretq_u16_s16(abs_row0), row0_mask);
   /* Find nbits required to specify sign and amplitude of coefficient. */
-#if defined(_MSC_VER) && !defined(__clang__)
-  unsigned int lz = BUILTIN_CLZ(vgetq_lane_s16(abs_row0, 0));
-#else
-  unsigned int lz;
-  __asm__("clz %w0, %w1" : "=r"(lz) : "r"(vgetq_lane_s16(abs_row0, 0)));
-#endif
-  unsigned int nbits = 32 - lz;
+  unsigned int lz = vgetq_lane_u16(vreinterpretq_u16_s16(row0_lz), 0);
+  unsigned int nbits = 16 - lz;
   /* Emit Huffman-coded symbol and additional diff bits. */
-  unsigned int diff = (unsigned int)(vgetq_lane_u16(row0_diff, 0) << lz) >> lz;
+  unsigned int diff = vgetq_lane_u16(row0_diff, 0);
   PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits], diff)
 
   /* Encode AC coefficients. */
@@ -263,13 +244,20 @@
 
   /* The most efficient method of computing nbits and diff depends on the
    * number of non-zero coefficients.  If the bitmap is not too sparse (> 8
-   * non-zero AC coefficients), it is beneficial to use Neon; else we compute
-   * nbits and diff on demand using scalar code.
+   * non-zero AC coefficients), it is beneficial to do all of the work using
+   * Neon; else we do some of the work using Neon and the rest on demand using
+   * scalar code.
    */
   if (non_zero_coefficients > 8) {
     uint8_t block_nbits[DCTSIZE2];
 
-    int16x8_t row0_lz = vclzq_s16(abs_row0);
+    int16x8_t abs_row1 = vabsq_s16(row1);
+    int16x8_t abs_row2 = vabsq_s16(row2);
+    int16x8_t abs_row3 = vabsq_s16(row3);
+    int16x8_t abs_row4 = vabsq_s16(row4);
+    int16x8_t abs_row5 = vabsq_s16(row5);
+    int16x8_t abs_row6 = vabsq_s16(row6);
+    int16x8_t abs_row7 = vabsq_s16(row7);
     int16x8_t row1_lz = vclzq_s16(abs_row1);
     int16x8_t row2_lz = vclzq_s16(abs_row2);
     int16x8_t row3_lz = vclzq_s16(abs_row3);
@@ -277,49 +265,48 @@
     int16x8_t row5_lz = vclzq_s16(abs_row5);
     int16x8_t row6_lz = vclzq_s16(abs_row6);
     int16x8_t row7_lz = vclzq_s16(abs_row7);
+    /* Narrow leading zero count to 8 bits. */
+    uint8x16_t row01_lz = vuzp1q_u8(vreinterpretq_u8_s16(row0_lz),
+                                    vreinterpretq_u8_s16(row1_lz));
+    uint8x16_t row23_lz = vuzp1q_u8(vreinterpretq_u8_s16(row2_lz),
+                                    vreinterpretq_u8_s16(row3_lz));
+    uint8x16_t row45_lz = vuzp1q_u8(vreinterpretq_u8_s16(row4_lz),
+                                    vreinterpretq_u8_s16(row5_lz));
+    uint8x16_t row67_lz = vuzp1q_u8(vreinterpretq_u8_s16(row6_lz),
+                                    vreinterpretq_u8_s16(row7_lz));
     /* Compute nbits needed to specify magnitude of each coefficient. */
-    uint8x8_t row0_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row0_lz)));
-    uint8x8_t row1_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row1_lz)));
-    uint8x8_t row2_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row2_lz)));
-    uint8x8_t row3_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row3_lz)));
-    uint8x8_t row4_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row4_lz)));
-    uint8x8_t row5_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row5_lz)));
-    uint8x8_t row6_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row6_lz)));
-    uint8x8_t row7_nbits = vsub_u8(vdup_n_u8(16),
-                                   vmovn_u16(vreinterpretq_u16_s16(row7_lz)));
+    uint8x16_t row01_nbits = vsubq_u8(vdupq_n_u8(16), row01_lz);
+    uint8x16_t row23_nbits = vsubq_u8(vdupq_n_u8(16), row23_lz);
+    uint8x16_t row45_nbits = vsubq_u8(vdupq_n_u8(16), row45_lz);
+    uint8x16_t row67_nbits = vsubq_u8(vdupq_n_u8(16), row67_lz);
     /* Store nbits. */
-    vst1_u8(block_nbits + 0 * DCTSIZE, row0_nbits);
-    vst1_u8(block_nbits + 1 * DCTSIZE, row1_nbits);
-    vst1_u8(block_nbits + 2 * DCTSIZE, row2_nbits);
-    vst1_u8(block_nbits + 3 * DCTSIZE, row3_nbits);
-    vst1_u8(block_nbits + 4 * DCTSIZE, row4_nbits);
-    vst1_u8(block_nbits + 5 * DCTSIZE, row5_nbits);
-    vst1_u8(block_nbits + 6 * DCTSIZE, row6_nbits);
-    vst1_u8(block_nbits + 7 * DCTSIZE, row7_nbits);
+    vst1q_u8(block_nbits + 0 * DCTSIZE, row01_nbits);
+    vst1q_u8(block_nbits + 2 * DCTSIZE, row23_nbits);
+    vst1q_u8(block_nbits + 4 * DCTSIZE, row45_nbits);
+    vst1q_u8(block_nbits + 6 * DCTSIZE, row67_nbits);
     /* Mask bits not required to specify sign and amplitude of diff. */
-    row0_diff = vshlq_u16(row0_diff, row0_lz);
-    row1_diff = vshlq_u16(row1_diff, row1_lz);
-    row2_diff = vshlq_u16(row2_diff, row2_lz);
-    row3_diff = vshlq_u16(row3_diff, row3_lz);
-    row4_diff = vshlq_u16(row4_diff, row4_lz);
-    row5_diff = vshlq_u16(row5_diff, row5_lz);
-    row6_diff = vshlq_u16(row6_diff, row6_lz);
-    row7_diff = vshlq_u16(row7_diff, row7_lz);
-    row0_diff = vshlq_u16(row0_diff, vnegq_s16(row0_lz));
-    row1_diff = vshlq_u16(row1_diff, vnegq_s16(row1_lz));
-    row2_diff = vshlq_u16(row2_diff, vnegq_s16(row2_lz));
-    row3_diff = vshlq_u16(row3_diff, vnegq_s16(row3_lz));
-    row4_diff = vshlq_u16(row4_diff, vnegq_s16(row4_lz));
-    row5_diff = vshlq_u16(row5_diff, vnegq_s16(row5_lz));
-    row6_diff = vshlq_u16(row6_diff, vnegq_s16(row6_lz));
-    row7_diff = vshlq_u16(row7_diff, vnegq_s16(row7_lz));
+    uint16x8_t row1_mask = vshlq_u16(vcltzq_s16(row1), vnegq_s16(row1_lz));
+    uint16x8_t row2_mask = vshlq_u16(vcltzq_s16(row2), vnegq_s16(row2_lz));
+    uint16x8_t row3_mask = vshlq_u16(vcltzq_s16(row3), vnegq_s16(row3_lz));
+    uint16x8_t row4_mask = vshlq_u16(vcltzq_s16(row4), vnegq_s16(row4_lz));
+    uint16x8_t row5_mask = vshlq_u16(vcltzq_s16(row5), vnegq_s16(row5_lz));
+    uint16x8_t row6_mask = vshlq_u16(vcltzq_s16(row6), vnegq_s16(row6_lz));
+    uint16x8_t row7_mask = vshlq_u16(vcltzq_s16(row7), vnegq_s16(row7_lz));
+    /* diff = abs(coeff) ^ sign(coeff) [no-op for positive coefficients] */
+    uint16x8_t row1_diff = veorq_u16(vreinterpretq_u16_s16(abs_row1),
+                                     row1_mask);
+    uint16x8_t row2_diff = veorq_u16(vreinterpretq_u16_s16(abs_row2),
+                                     row2_mask);
+    uint16x8_t row3_diff = veorq_u16(vreinterpretq_u16_s16(abs_row3),
+                                     row3_mask);
+    uint16x8_t row4_diff = veorq_u16(vreinterpretq_u16_s16(abs_row4),
+                                     row4_mask);
+    uint16x8_t row5_diff = veorq_u16(vreinterpretq_u16_s16(abs_row5),
+                                     row5_mask);
+    uint16x8_t row6_diff = veorq_u16(vreinterpretq_u16_s16(abs_row6),
+                                     row6_mask);
+    uint16x8_t row7_diff = veorq_u16(vreinterpretq_u16_s16(abs_row7),
+                                     row7_mask);
     /* Store diff bits. */
     vst1q_u16(block_diff + 0 * DCTSIZE, row0_diff);
     vst1q_u16(block_diff + 1 * DCTSIZE, row1_diff);
@@ -349,7 +336,14 @@
     }
   } else if (bitmap != 0) {
     uint16_t block_abs[DCTSIZE2];
-    /* Store absolute value of coefficients. */
+    /* Compute and store absolute value of coefficients. */
+    int16x8_t abs_row1 = vabsq_s16(row1);
+    int16x8_t abs_row2 = vabsq_s16(row2);
+    int16x8_t abs_row3 = vabsq_s16(row3);
+    int16x8_t abs_row4 = vabsq_s16(row4);
+    int16x8_t abs_row5 = vabsq_s16(row5);
+    int16x8_t abs_row6 = vabsq_s16(row6);
+    int16x8_t abs_row7 = vabsq_s16(row7);
     vst1q_u16(block_abs + 0 * DCTSIZE, vreinterpretq_u16_s16(abs_row0));
     vst1q_u16(block_abs + 1 * DCTSIZE, vreinterpretq_u16_s16(abs_row1));
     vst1q_u16(block_abs + 2 * DCTSIZE, vreinterpretq_u16_s16(abs_row2));
@@ -358,7 +352,21 @@
     vst1q_u16(block_abs + 5 * DCTSIZE, vreinterpretq_u16_s16(abs_row5));
     vst1q_u16(block_abs + 6 * DCTSIZE, vreinterpretq_u16_s16(abs_row6));
     vst1q_u16(block_abs + 7 * DCTSIZE, vreinterpretq_u16_s16(abs_row7));
-    /* Store diff bits. */
+    /* Compute diff bits (without nbits mask) and store. */
+    uint16x8_t row1_diff = veorq_u16(vreinterpretq_u16_s16(abs_row1),
+                                     vcltzq_s16(row1));
+    uint16x8_t row2_diff = veorq_u16(vreinterpretq_u16_s16(abs_row2),
+                                     vcltzq_s16(row2));
+    uint16x8_t row3_diff = veorq_u16(vreinterpretq_u16_s16(abs_row3),
+                                     vcltzq_s16(row3));
+    uint16x8_t row4_diff = veorq_u16(vreinterpretq_u16_s16(abs_row4),
+                                     vcltzq_s16(row4));
+    uint16x8_t row5_diff = veorq_u16(vreinterpretq_u16_s16(abs_row5),
+                                     vcltzq_s16(row5));
+    uint16x8_t row6_diff = veorq_u16(vreinterpretq_u16_s16(abs_row6),
+                                     vcltzq_s16(row6));
+    uint16x8_t row7_diff = veorq_u16(vreinterpretq_u16_s16(abs_row7),
+                                     vcltzq_s16(row7));
     vst1q_u16(block_diff + 0 * DCTSIZE, row0_diff);
     vst1q_u16(block_diff + 1 * DCTSIZE, row1_diff);
     vst1q_u16(block_diff + 2 * DCTSIZE, row2_diff);
@@ -375,7 +383,7 @@
       bitmap <<= r;
       lz = BUILTIN_CLZ(block_abs[i]);
       nbits = 32 - lz;
-      diff = (unsigned int)(block_diff[i] << lz) >> lz;
+      diff = ((unsigned int)block_diff[i] << lz) >> lz;
       while (r > 15) {
         /* If run length > 15, emit special run-length-16 codes. */
         PUT_BITS(code_0xf0, size_0xf0)
diff --git a/simd/arm/aarch64/jsimd.c b/simd/arm/aarch64/jsimd.c
index 8570b82..358e159 100644
--- a/simd/arm/aarch64/jsimd.c
+++ b/simd/arm/aarch64/jsimd.c
@@ -3,8 +3,8 @@
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
@@ -23,20 +23,17 @@
 #include "../../../jdct.h"
 #include "../../../jsimddct.h"
 #include "../../jsimd.h"
-#include "jconfigint.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
 
 #define JSIMD_FASTLD3  1
 #define JSIMD_FASTST3  2
 #define JSIMD_FASTTBL  4
 
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
-static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
-                                    JSIMD_FASTTBL;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 |
+                                                 JSIMD_FASTST3 | JSIMD_FASTTBL;
 
 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
 
@@ -111,8 +108,6 @@
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 
 /*
@@ -125,7 +120,7 @@
 init_simd(void)
 {
 #ifndef NO_GETENV
-  char *env = NULL;
+  char env[2] = { 0 };
 #endif
 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
   int bufsize = 1024; /* an initial guess for the line buffer size limit */
@@ -147,24 +142,19 @@
 
 #ifndef NO_GETENV
   /* Force different settings through environment variables */
-  env = getenv("JSIMD_FORCENEON");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
     simd_support = JSIMD_NEON;
-  env = getenv("JSIMD_FORCENONE");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
     simd_support = 0;
-  env = getenv("JSIMD_NOHUFFENC");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
     simd_huffman = 0;
-  env = getenv("JSIMD_FASTLD3");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1"))
     simd_features |= JSIMD_FASTLD3;
-  if ((env != NULL) && (strcmp(env, "0") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0"))
     simd_features &= ~JSIMD_FASTLD3;
-  env = getenv("JSIMD_FASTST3");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1"))
     simd_features |= JSIMD_FASTST3;
-  if ((env != NULL) && (strcmp(env, "0") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0"))
     simd_features &= ~JSIMD_FASTST3;
 #endif
 }
@@ -1028,7 +1018,7 @@
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1055,7 +1045,7 @@
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
                                                  jpeg_natural_order_start,
diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c
index b91c5db..51db3c5 100644
--- a/simd/arm/jcphuff-neon.c
+++ b/simd/arm/jcphuff-neon.c
@@ -2,6 +2,8 @@
  * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon)
  *
  * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2022, Matthieu Darbois.  All Rights Reserved.
+ * Copyright (C) 2022, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,7 +23,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
@@ -41,10 +42,10 @@
 
 void jsimd_encode_mcu_AC_first_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits)
+   UJCOEF *values, size_t *zerobits)
 {
-  JCOEF *values_ptr = values;
-  JCOEF *diff_values_ptr = values + DCTSIZE2;
+  UJCOEF *values_ptr = values;
+  UJCOEF *diff_values_ptr = values + DCTSIZE2;
 
   /* Rows of coefficients to zero (since they haven't been processed) */
   int i, rows_to_zero = 8;
@@ -68,23 +69,23 @@
     coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7);
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
-    int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+    uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+    uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
-    int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+    uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+    uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs1);
-    vst1q_s16(values_ptr + DCTSIZE, coefs2);
-    vst1q_s16(diff_values_ptr, diff1);
-    vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+    vst1q_u16(values_ptr, abs_coefs1);
+    vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+    vst1q_u16(diff_values_ptr, diff1);
+    vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
     values_ptr += 16;
     diff_values_ptr += 16;
     jpeg_natural_order_start += 16;
@@ -130,23 +131,23 @@
     }
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
-    int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+    uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+    uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
-    int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+    uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+    uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs1);
-    vst1q_s16(values_ptr + DCTSIZE, coefs2);
-    vst1q_s16(diff_values_ptr, diff1);
-    vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+    vst1q_u16(values_ptr, abs_coefs1);
+    vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+    vst1q_u16(diff_values_ptr, diff1);
+    vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
     values_ptr += 16;
     diff_values_ptr += 16;
     rows_to_zero -= 2;
@@ -184,17 +185,17 @@
     }
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs = vshrq_n_s16(coefs, 15);
+    uint16x8_t sign_coefs = vreinterpretq_u16_s16(vshrq_n_s16(coefs, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs = vabsq_s16(coefs);
-    coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+    abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff = veorq_s16(coefs, sign_coefs);
+    uint16x8_t diff = veorq_u16(abs_coefs, sign_coefs);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs);
-    vst1q_s16(diff_values_ptr, diff);
+    vst1q_u16(values_ptr, abs_coefs);
+    vst1q_u16(diff_values_ptr, diff);
     values_ptr += 8;
     diff_values_ptr += 8;
     rows_to_zero--;
@@ -202,8 +203,8 @@
 
   /* Zero remaining memory in the values and diff_values blocks. */
   for (i = 0; i < rows_to_zero; i++) {
-    vst1q_s16(values_ptr, vdupq_n_s16(0));
-    vst1q_s16(diff_values_ptr, vdupq_n_s16(0));
+    vst1q_u16(values_ptr, vdupq_n_u16(0));
+    vst1q_u16(diff_values_ptr, vdupq_n_u16(0));
     values_ptr += 8;
     diff_values_ptr += 8;
   }
@@ -211,23 +212,23 @@
   /* Construct zerobits bitmap.  A set bit means that the corresponding
    * coefficient != 0.
    */
-  int16x8_t row0 = vld1q_s16(values + 0 * DCTSIZE);
-  int16x8_t row1 = vld1q_s16(values + 1 * DCTSIZE);
-  int16x8_t row2 = vld1q_s16(values + 2 * DCTSIZE);
-  int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE);
-  int16x8_t row4 = vld1q_s16(values + 4 * DCTSIZE);
-  int16x8_t row5 = vld1q_s16(values + 5 * DCTSIZE);
-  int16x8_t row6 = vld1q_s16(values + 6 * DCTSIZE);
-  int16x8_t row7 = vld1q_s16(values + 7 * DCTSIZE);
+  uint16x8_t row0 = vld1q_u16(values + 0 * DCTSIZE);
+  uint16x8_t row1 = vld1q_u16(values + 1 * DCTSIZE);
+  uint16x8_t row2 = vld1q_u16(values + 2 * DCTSIZE);
+  uint16x8_t row3 = vld1q_u16(values + 3 * DCTSIZE);
+  uint16x8_t row4 = vld1q_u16(values + 4 * DCTSIZE);
+  uint16x8_t row5 = vld1q_u16(values + 5 * DCTSIZE);
+  uint16x8_t row6 = vld1q_u16(values + 6 * DCTSIZE);
+  uint16x8_t row7 = vld1q_u16(values + 7 * DCTSIZE);
 
-  uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0)));
-  uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0)));
-  uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0)));
-  uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0)));
-  uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0)));
-  uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0)));
-  uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0)));
-  uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0)));
+  uint8x8_t row0_eq0 = vmovn_u16(vceqq_u16(row0, vdupq_n_u16(0)));
+  uint8x8_t row1_eq0 = vmovn_u16(vceqq_u16(row1, vdupq_n_u16(0)));
+  uint8x8_t row2_eq0 = vmovn_u16(vceqq_u16(row2, vdupq_n_u16(0)));
+  uint8x8_t row3_eq0 = vmovn_u16(vceqq_u16(row3, vdupq_n_u16(0)));
+  uint8x8_t row4_eq0 = vmovn_u16(vceqq_u16(row4, vdupq_n_u16(0)));
+  uint8x8_t row5_eq0 = vmovn_u16(vceqq_u16(row5, vdupq_n_u16(0)));
+  uint8x8_t row6_eq0 = vmovn_u16(vceqq_u16(row6, vdupq_n_u16(0)));
+  uint8x8_t row7_eq0 = vmovn_u16(vceqq_u16(row7, vdupq_n_u16(0)));
 
   /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
   const uint8x8_t bitmap_mask =
@@ -274,7 +275,7 @@
 
 int jsimd_encode_mcu_AC_refine_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits)
+   UJCOEF *absvalues, size_t *bits)
 {
   /* Temporary storage buffers for data used to compute the signbits bitmap and
    * the end-of-block (EOB) position
@@ -282,7 +283,7 @@
   uint8_t coef_sign_bits[64];
   uint8_t coef_eq1_bits[64];
 
-  JCOEF *absvalues_ptr = absvalues;
+  UJCOEF *absvalues_ptr = absvalues;
   uint8_t *coef_sign_bits_ptr = coef_sign_bits;
   uint8_t *eq1_bits_ptr = coef_eq1_bits;
 
@@ -316,18 +317,18 @@
     vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs1);
-    vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs1);
+    vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
-    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq11);
     vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
 
@@ -385,18 +386,18 @@
     vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs1);
-    vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs1);
+    vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
-    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq11);
     vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
 
@@ -444,14 +445,14 @@
     vst1_u8(coef_sign_bits_ptr, sign_coefs);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs = vabsq_s16(coefs);
-    coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs);
+    uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+    abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq1 = vmovn_u16(vceqq_s16(coefs, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq1 = vmovn_u16(vceqq_u16(abs_coefs, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq1);
 
     absvalues_ptr += 8;
@@ -462,7 +463,7 @@
 
   /* Zero remaining memory in blocks. */
   for (i = 0; i < rows_to_zero; i++) {
-    vst1q_s16(absvalues_ptr, vdupq_n_s16(0));
+    vst1q_u16(absvalues_ptr, vdupq_n_u16(0));
     vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0));
     vst1_u8(eq1_bits_ptr, vdup_n_u8(0));
     absvalues_ptr += 8;
@@ -471,23 +472,23 @@
   }
 
   /* Construct zerobits bitmap. */
-  int16x8_t abs_row0 = vld1q_s16(absvalues + 0 * DCTSIZE);
-  int16x8_t abs_row1 = vld1q_s16(absvalues + 1 * DCTSIZE);
-  int16x8_t abs_row2 = vld1q_s16(absvalues + 2 * DCTSIZE);
-  int16x8_t abs_row3 = vld1q_s16(absvalues + 3 * DCTSIZE);
-  int16x8_t abs_row4 = vld1q_s16(absvalues + 4 * DCTSIZE);
-  int16x8_t abs_row5 = vld1q_s16(absvalues + 5 * DCTSIZE);
-  int16x8_t abs_row6 = vld1q_s16(absvalues + 6 * DCTSIZE);
-  int16x8_t abs_row7 = vld1q_s16(absvalues + 7 * DCTSIZE);
+  uint16x8_t abs_row0 = vld1q_u16(absvalues + 0 * DCTSIZE);
+  uint16x8_t abs_row1 = vld1q_u16(absvalues + 1 * DCTSIZE);
+  uint16x8_t abs_row2 = vld1q_u16(absvalues + 2 * DCTSIZE);
+  uint16x8_t abs_row3 = vld1q_u16(absvalues + 3 * DCTSIZE);
+  uint16x8_t abs_row4 = vld1q_u16(absvalues + 4 * DCTSIZE);
+  uint16x8_t abs_row5 = vld1q_u16(absvalues + 5 * DCTSIZE);
+  uint16x8_t abs_row6 = vld1q_u16(absvalues + 6 * DCTSIZE);
+  uint16x8_t abs_row7 = vld1q_u16(absvalues + 7 * DCTSIZE);
 
-  uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_s16(abs_row0, vdupq_n_s16(0)));
-  uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_s16(abs_row1, vdupq_n_s16(0)));
-  uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_s16(abs_row2, vdupq_n_s16(0)));
-  uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_s16(abs_row3, vdupq_n_s16(0)));
-  uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_s16(abs_row4, vdupq_n_s16(0)));
-  uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_s16(abs_row5, vdupq_n_s16(0)));
-  uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_s16(abs_row6, vdupq_n_s16(0)));
-  uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_s16(abs_row7, vdupq_n_s16(0)));
+  uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_u16(abs_row0, vdupq_n_u16(0)));
+  uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_u16(abs_row1, vdupq_n_u16(0)));
+  uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_u16(abs_row2, vdupq_n_u16(0)));
+  uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_u16(abs_row3, vdupq_n_u16(0)));
+  uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_u16(abs_row4, vdupq_n_u16(0)));
+  uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_u16(abs_row5, vdupq_n_u16(0)));
+  uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_u16(abs_row6, vdupq_n_u16(0)));
+  uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_u16(abs_row7, vdupq_n_u16(0)));
 
   /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
   const uint8x8_t bitmap_mask =
diff --git a/simd/arm/jdcolor-neon.c b/simd/arm/jdcolor-neon.c
index ea4668f..28dbc57 100644
--- a/simd/arm/jdcolor-neon.c
+++ b/simd/arm/jdcolor-neon.c
@@ -21,7 +21,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
diff --git a/simd/arm/jdmerge-neon.c b/simd/arm/jdmerge-neon.c
index e4f91fd..18fb9d8 100644
--- a/simd/arm/jdmerge-neon.c
+++ b/simd/arm/jdmerge-neon.c
@@ -21,7 +21,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
diff --git a/simd/arm/jidctint-neon.c b/simd/arm/jidctint-neon.c
index 043b652..d25112e 100644
--- a/simd/arm/jidctint-neon.c
+++ b/simd/arm/jidctint-neon.c
@@ -22,7 +22,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
diff --git a/simd/arm/jquanti-neon.c b/simd/arm/jquanti-neon.c
index a7eb6f1..d5d95d8 100644
--- a/simd/arm/jquanti-neon.c
+++ b/simd/arm/jquanti-neon.c
@@ -1,7 +1,7 @@
 /*
  * jquanti-neon.c - sample data conversion and quantization (Arm Neon)
  *
- * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -100,6 +100,9 @@
   DCTELEM *shift_ptr = divisors + 3 * DCTSIZE2;
   int i;
 
+#if defined(__clang__) && (defined(__aarch64__) || defined(_M_ARM64))
+#pragma unroll
+#endif
   for (i = 0; i < DCTSIZE; i += DCTSIZE / 2) {
     /* Load DCT coefficients. */
     int16x8_t row0 = vld1q_s16(workspace + (i + 0) * DCTSIZE);
diff --git a/simd/i386/jsimd.c b/simd/i386/jsimd.c
index 563949a..b429b0a 100644
--- a/simd/i386/jsimd.c
+++ b/simd/i386/jsimd.c
@@ -2,8 +2,8 @@
  * jsimd_i386.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -21,7 +21,6 @@
 #include "../../jdct.h"
 #include "../../jsimddct.h"
 #include "../jsimd.h"
-#include "jconfigint.h"
 
 /*
  * In the PIC cases, we have no guarantee that constants will keep
@@ -32,19 +31,17 @@
 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
 
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
 {
 #ifndef NO_GETENV
-  char *env = NULL;
+  char env[2] = { 0 };
 #endif
 
   if (simd_support != ~0U)
@@ -54,26 +51,19 @@
 
 #ifndef NO_GETENV
   /* Force different settings through environment variables */
-  env = getenv("JSIMD_FORCEMMX");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCEMMX") && !strcmp(env, "1"))
     simd_support &= JSIMD_MMX;
-  env = getenv("JSIMD_FORCE3DNOW");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCE3DNOW") && !strcmp(env, "1"))
     simd_support &= JSIMD_3DNOW | JSIMD_MMX;
-  env = getenv("JSIMD_FORCESSE");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCESSE") && !strcmp(env, "1"))
     simd_support &= JSIMD_SSE | JSIMD_MMX;
-  env = getenv("JSIMD_FORCESSE2");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
     simd_support &= JSIMD_SSE2;
-  env = getenv("JSIMD_FORCEAVX2");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
     simd_support &= JSIMD_AVX2;
-  env = getenv("JSIMD_FORCENONE");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
     simd_support = 0;
-  env = getenv("JSIMD_NOHUFFENC");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
     simd_huffman = 0;
 #endif
 }
@@ -168,6 +158,9 @@
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -227,6 +220,9 @@
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -286,6 +282,9 @@
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -389,6 +388,9 @@
 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -409,6 +411,9 @@
 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -471,6 +476,9 @@
 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -486,6 +494,9 @@
 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -547,6 +558,9 @@
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -565,6 +579,9 @@
 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -633,6 +650,9 @@
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -691,6 +711,9 @@
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -795,6 +818,9 @@
 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
                DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_convsamp_avx2(sample_data, start_col, workspace);
   else if (simd_support & JSIMD_SSE2)
@@ -807,6 +833,9 @@
 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
                      FAST_FLOAT *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_SSE2)
     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
   else if (simd_support & JSIMD_SSE)
@@ -877,6 +906,9 @@
 GLOBAL(void)
 jsimd_fdct_islow(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_fdct_islow_avx2(data);
   else if (simd_support & JSIMD_SSE2)
@@ -888,6 +920,9 @@
 GLOBAL(void)
 jsimd_fdct_ifast(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
     jsimd_fdct_ifast_sse2(data);
   else
@@ -897,6 +932,9 @@
 GLOBAL(void)
 jsimd_fdct_float(FAST_FLOAT *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
     jsimd_fdct_float_sse(data);
   else if (simd_support & JSIMD_3DNOW)
@@ -952,6 +990,9 @@
 GLOBAL(void)
 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_quantize_avx2(coef_block, divisors, workspace);
   else if (simd_support & JSIMD_SSE2)
@@ -964,6 +1005,9 @@
 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
                      FAST_FLOAT *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_SSE2)
     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
   else if (simd_support & JSIMD_SSE)
@@ -1027,6 +1071,9 @@
                JCOEFPTR coef_block, JSAMPARRAY output_buf,
                JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
                         output_col);
@@ -1039,6 +1086,9 @@
                JCOEFPTR coef_block, JSAMPARRAY output_buf,
                JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
                         output_col);
@@ -1133,6 +1183,9 @@
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1149,6 +1202,9 @@
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1162,6 +1218,9 @@
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
     jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1219,7 +1278,7 @@
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1245,7 +1304,7 @@
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
                                                  jpeg_natural_order_start,
diff --git a/simd/jsimd.h b/simd/jsimd.h
index 64747c6..a28754a 100644
--- a/simd/jsimd.h
+++ b/simd/jsimd.h
@@ -2,10 +2,10 @@
  * simd/jsimd.h
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2011, 2014-2016, 2018, 2020, D. R. Commander.
+ * Copyright (C) 2011, 2014-2016, 2018, 2020, 2022, D. R. Commander.
  * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
  * Copyright (C) 2014, Linaro Limited.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
  * Copyright (C) 2020, Arm Limited.
  *
@@ -1243,16 +1243,16 @@
 /* Progressive Huffman encoding */
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
diff --git a/simd/x86_64/jchuff-sse2.asm b/simd/x86_64/jchuff-sse2.asm
index 0072028..9ea6df9 100644
--- a/simd/x86_64/jchuff-sse2.asm
+++ b/simd/x86_64/jchuff-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2)
 ;
-; Copyright (C) 2009-2011, 2014-2016, 2019, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, D. R. Commander.
 ; Copyright (C) 2015, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
@@ -83,6 +83,7 @@
 times 1 << 12 db 13
 times 1 << 13 db 14
 times 1 << 14 db 15
+times 1 << 15 db 16
 
     alignz      32
 
diff --git a/simd/x86_64/jsimd.c b/simd/x86_64/jsimd.c
index eb76679..3f5ee77 100644
--- a/simd/x86_64/jsimd.c
+++ b/simd/x86_64/jsimd.c
@@ -2,8 +2,8 @@
  * jsimd_x86_64.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -21,7 +21,6 @@
 #include "../../jdct.h"
 #include "../../jsimddct.h"
 #include "../jsimd.h"
-#include "jconfigint.h"
 
 /*
  * In the PIC cases, we have no guarantee that constants will keep
@@ -32,19 +31,17 @@
 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
 
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
 {
 #ifndef NO_GETENV
-  char *env = NULL;
+  char env[2] = { 0 };
 #endif
 
   if (simd_support != ~0U)
@@ -54,17 +51,13 @@
 
 #ifndef NO_GETENV
   /* Force different settings through environment variables */
-  env = getenv("JSIMD_FORCESSE2");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
     simd_support &= JSIMD_SSE2;
-  env = getenv("JSIMD_FORCEAVX2");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
     simd_support &= JSIMD_AVX2;
-  env = getenv("JSIMD_FORCENONE");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
     simd_support = 0;
-  env = getenv("JSIMD_NOHUFFENC");
-  if ((env != NULL) && (strcmp(env, "1") == 0))
+  if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
     simd_huffman = 0;
 #endif
 }
@@ -152,6 +145,9 @@
   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -201,6 +197,9 @@
   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -250,6 +249,9 @@
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -340,6 +342,9 @@
 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -356,6 +361,9 @@
 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -410,6 +418,9 @@
 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -422,6 +433,9 @@
 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -476,6 +490,9 @@
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -490,6 +507,9 @@
 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -549,6 +569,9 @@
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -597,6 +620,9 @@
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -686,6 +712,9 @@
 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
                DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_convsamp_avx2(sample_data, start_col, workspace);
   else
@@ -755,6 +784,9 @@
 GLOBAL(void)
 jsimd_fdct_islow(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_fdct_islow_avx2(data);
   else
@@ -816,6 +848,9 @@
 GLOBAL(void)
 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_quantize_avx2(coef_block, divisors, workspace);
   else
@@ -970,6 +1005,9 @@
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1040,7 +1078,7 @@
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1064,7 +1102,7 @@
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
                                                  jpeg_natural_order_start,
diff --git a/tjbench.c b/tjbench.c
index 97475ec..7291723 100644
--- a/tjbench.c
+++ b/tjbench.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2009-2019, 2021 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2009-2019, 2021-2023 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -26,6 +26,10 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -62,8 +66,10 @@
     if (strncmp(tjErrorStr, _tjErrorStr, JMSG_LENGTH_MAX) || \
         strncmp(tjErrorMsg, m, JMSG_LENGTH_MAX) || \
         tjErrorCode != _tjErrorCode || tjErrorLine != __LINE__) { \
-      strncpy(tjErrorStr, _tjErrorStr, JMSG_LENGTH_MAX - 1); \
-      strncpy(tjErrorMsg, m, JMSG_LENGTH_MAX - 1); \
+      strncpy(tjErrorStr, _tjErrorStr, JMSG_LENGTH_MAX); \
+      tjErrorStr[JMSG_LENGTH_MAX - 1] = '\0'; \
+      strncpy(tjErrorMsg, m, JMSG_LENGTH_MAX); \
+      tjErrorMsg[JMSG_LENGTH_MAX - 1] = '\0'; \
       tjErrorCode = _tjErrorCode; \
       tjErrorLine = __LINE__; \
       fprintf(stderr, "WARNING in line %d while %s:\n%s\n", __LINE__, m, \
@@ -78,7 +84,7 @@
 }
 
 static int flags = TJFLAG_NOREALLOC, compOnly = 0, decompOnly = 0, doYUV = 0,
-  quiet = 0, doTile = 0, pf = TJPF_BGR, yuvPad = 1, doWrite = 1;
+  quiet = 0, doTile = 0, pf = TJPF_BGR, yuvAlign = 1, doWrite = 1;
 static char *ext = "ppm";
 static const char *pixFormatStr[TJ_NUMPF] = {
   "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY", "", "", "", "", "CMYK"
@@ -104,7 +110,7 @@
   if (cs == TJCS_YCbCr)
     return (char *)subNameLong[subsamp];
   else if (cs == TJCS_YCCK || cs == TJCS_CMYK) {
-    snprintf(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]);
+    SNPRINTF(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]);
     return buf;
   } else
     return (char *)csName[cs];
@@ -117,10 +123,10 @@
   int digitsAfterDecimal = figs - (int)ceil(log10(fabs(val)));
 
   if (digitsAfterDecimal < 1)
-    snprintf(format, 80, "%%.0f");
+    SNPRINTF(format, 80, "%%.0f");
   else
-    snprintf(format, 80, "%%.%df", digitsAfterDecimal);
-  snprintf(buf, len, format, val);
+    SNPRINTF(format, 80, "%%.%df", digitsAfterDecimal);
+  SNPRINTF(buf, len, format, val);
   return buf;
 }
 
@@ -157,7 +163,7 @@
   unsigned char *dstPtr, *dstPtr2, *yuvBuf = NULL;
 
   if (jpegQual > 0) {
-    snprintf(qualStr, 13, "_Q%d", jpegQual);
+    SNPRINTF(qualStr, 13, "_Q%d", jpegQual);
     qualStr[12] = 0;
   }
 
@@ -179,7 +185,7 @@
   if (doYUV) {
     int width = doTile ? tilew : scaledw;
     int height = doTile ? tileh : scaledh;
-    unsigned long yuvSize = tjBufSizeYUV2(width, yuvPad, height, subsamp);
+    unsigned long yuvSize = tjBufSizeYUV2(width, yuvAlign, height, subsamp);
 
     if (yuvSize == (unsigned long)-1)
       THROW_TJ("allocating YUV buffer");
@@ -206,10 +212,10 @@
           double startDecode;
 
           if (tjDecompressToYUV2(handle, jpegBuf[tile], jpegSize[tile], yuvBuf,
-                                 width, yuvPad, height, flags) == -1)
+                                 width, yuvAlign, height, flags) == -1)
             THROW_TJ("executing tjDecompressToYUV2()");
           startDecode = getTime();
-          if (tjDecodeYUV(handle, yuvBuf, yuvPad, subsamp, dstPtr2, width,
+          if (tjDecodeYUV(handle, yuvBuf, yuvAlign, subsamp, dstPtr2, width,
                           pitch, height, pf, flags) == -1)
             THROW_TJ("executing tjDecodeYUV()");
           if (iter >= 0) elapsedDecode += getTime() - startDecode;
@@ -261,23 +267,23 @@
   if (!doWrite) goto bailout;
 
   if (sf.num != 1 || sf.denom != 1)
-    snprintf(sizeStr, 24, "%d_%d", sf.num, sf.denom);
+    SNPRINTF(sizeStr, 24, "%d_%d", sf.num, sf.denom);
   else if (tilew != w || tileh != h)
-    snprintf(sizeStr, 24, "%dx%d", tilew, tileh);
-  else snprintf(sizeStr, 24, "full");
+    SNPRINTF(sizeStr, 24, "%dx%d", tilew, tileh);
+  else SNPRINTF(sizeStr, 24, "full");
   if (decompOnly)
-    snprintf(tempStr, 1024, "%s_%s.%s", fileName, sizeStr, ext);
+    SNPRINTF(tempStr, 1024, "%s_%s.%s", fileName, sizeStr, ext);
   else
-    snprintf(tempStr, 1024, "%s_%s%s_%s.%s", fileName, subName[subsamp],
+    SNPRINTF(tempStr, 1024, "%s_%s%s_%s.%s", fileName, subName[subsamp],
              qualStr, sizeStr, ext);
 
   if (tjSaveImage(tempStr, dstBuf, scaledw, 0, scaledh, pf, flags) == -1)
-    THROW_TJG("saving bitmap");
+    THROW_TJG("saving output image");
   ptr = strrchr(tempStr, '.');
-  snprintf(ptr, 1024 - (ptr - tempStr), "-err.%s", ext);
+  SNPRINTF(ptr, 1024 - (ptr - tempStr), "-err.%s", ext);
   if (srcBuf && sf.num == 1 && sf.denom == 1) {
     if (!quiet) fprintf(stderr, "Compression error written to %s.\n", tempStr);
-    if (subsamp == TJ_GRAYSCALE) {
+    if (subsamp == TJSAMP_GRAY) {
       unsigned long index, index2;
 
       for (row = 0, index = 0; row < h; row++, index += pitch) {
@@ -291,19 +297,20 @@
 
           if (y > 255) y = 255;
           if (y < 0) y = 0;
-          dstBuf[rindex] = abs(dstBuf[rindex] - y);
-          dstBuf[gindex] = abs(dstBuf[gindex] - y);
-          dstBuf[bindex] = abs(dstBuf[bindex] - y);
+          dstBuf[rindex] = (unsigned char)abs(dstBuf[rindex] - y);
+          dstBuf[gindex] = (unsigned char)abs(dstBuf[gindex] - y);
+          dstBuf[bindex] = (unsigned char)abs(dstBuf[bindex] - y);
         }
       }
     } else {
       for (row = 0; row < h; row++)
         for (col = 0; col < w * ps; col++)
           dstBuf[pitch * row + col] =
-            abs(dstBuf[pitch * row + col] - srcBuf[pitch * row + col]);
+            (unsigned char)abs(dstBuf[pitch * row + col] -
+                               srcBuf[pitch * row + col]);
     }
     if (tjSaveImage(tempStr, dstBuf, w, 0, h, pf, flags) == -1)
-      THROW_TJG("saving bitmap");
+      THROW_TJG("saving output image");
   }
 
 bailout:
@@ -378,7 +385,7 @@
       THROW_TJ("executing tjInitCompress()");
 
     if (doYUV) {
-      yuvSize = tjBufSizeYUV2(tilew, yuvPad, tileh, subsamp);
+      yuvSize = tjBufSizeYUV2(tilew, yuvAlign, tileh, subsamp);
       if (yuvSize == (unsigned long)-1)
         THROW_TJ("allocating YUV buffer");
       if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL)
@@ -405,10 +412,10 @@
             double startEncode = getTime();
 
             if (tjEncodeYUV3(handle, srcPtr2, width, pitch, height, pf, yuvBuf,
-                             yuvPad, subsamp, flags) == -1)
+                             yuvAlign, subsamp, flags) == -1)
               THROW_TJ("executing tjEncodeYUV3()");
             if (iter >= 0) elapsedEncode += getTime() - startEncode;
-            if (tjCompressFromYUV(handle, yuvBuf, width, yuvPad, height,
+            if (tjCompressFromYUV(handle, yuvBuf, width, yuvAlign, height,
                                   subsamp, &jpegBuf[tile], &jpegSize[tile],
                                   jpegQual, flags) == -1)
               THROW_TJ("executing tjCompressFromYUV()");
@@ -482,7 +489,7 @@
               (double)totalJpegSize * 8. / 1000000. * (double)iter / elapsed);
     }
     if (tilew == w && tileh == h && doWrite) {
-      snprintf(tempStr, 1024, "%s_%s_Q%d.jpg", fileName, subName[subsamp],
+      SNPRINTF(tempStr, 1024, "%s_%s_Q%d.jpg", fileName, subName[subsamp],
                jpegQual);
       if ((file = fopen(tempStr, "wb")) == NULL)
         THROW_UNIX("opening reference image");
@@ -573,7 +580,7 @@
   if (quiet == 1) {
     fprintf(stderr, "All performance values in Mpixels/sec\n\n");
     fprintf(stderr,
-            "Bitmap     JPEG   JPEG     %s  %s   Xform   Comp    Decomp  ",
+            "Pixel     JPEG   JPEG     %s  %s   Xform   Comp    Decomp  ",
             doTile ? "Tile " : "Image", doTile ? "Tile " : "Image");
     if (doYUV) fprintf(stderr, "Decode");
     fprintf(stderr, "\n");
@@ -637,7 +644,7 @@
         tw = h;  th = w;  ttilew = tileh;  ttileh = tilew;
       }
 
-      if (xformOpt & TJXOPT_GRAY) tsubsamp = TJ_GRAYSCALE;
+      if (xformOpt & TJXOPT_GRAY) tsubsamp = TJSAMP_GRAY;
       if (xformOp == TJXOP_HFLIP || xformOp == TJXOP_ROT180)
         tw = tw - (tw % tjMCUWidth[tsubsamp]);
       if (xformOp == TJXOP_VFLIP || xformOp == TJXOP_ROT180)
@@ -699,7 +706,7 @@
                 sigfig((double)(w * h * ps) / (double)totalJpegSize, 4,
                        tempStr2, 80),
                 quiet == 2 ? "\n" : "  ");
-      } else if (!quiet) {
+      } else {
         fprintf(stderr, "Transform     --> Frame rate:         %f fps\n",
                 1.0 / elapsed);
         fprintf(stderr, "                  Output image size:  %lu bytes\n",
@@ -759,38 +766,34 @@
   int i;
 
   printf("USAGE: %s\n", progName);
-  printf("       <Inputfile (BMP|PPM)> <Quality> [options]\n\n");
+  printf("       <Inputimage (BMP|PPM)> <Quality> [options]\n\n");
   printf("       %s\n", progName);
-  printf("       <Inputfile (JPG)> [options]\n\n");
+  printf("       <Inputimage (JPG)> [options]\n\n");
   printf("Options:\n\n");
-  printf("-alloc = Dynamically allocate JPEG image buffers\n");
-  printf("-bmp = Generate output images in Windows Bitmap format (default = PPM)\n");
-  printf("-bottomup = Test bottom-up compression/decompression\n");
-  printf("-tile = Test performance of the codec when the image is encoded as separate\n");
-  printf("     tiles of varying sizes.\n");
+  printf("-alloc = Dynamically allocate JPEG buffers\n");
+  printf("-bmp = Use Windows Bitmap format for output images [default = PPM]\n");
+  printf("-bottomup = Use bottom-up row order for packed-pixel source/destination buffers\n");
+  printf("-tile = Compress/transform the input image into separate JPEG tiles of varying\n");
+  printf("     sizes (useful for measuring JPEG overhead)\n");
   printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n");
-  printf("     Test the specified color conversion path in the codec (default = BGR)\n");
-  printf("-cmyk = Indirectly test YCCK JPEG compression/decompression (the source\n");
-  printf("     and destination bitmaps are still RGB.  The conversion is done\n");
-  printf("     internally prior to compression or after decompression.)\n");
-  printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n");
-  printf("     the underlying codec\n");
-  printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n");
-  printf("     codec\n");
-  printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n");
-  printf("     underlying codec\n");
+  printf("     Use the specified pixel format for packed-pixel source/destination buffers\n");
+  printf("     [default = BGR]\n");
+  printf("-cmyk = Indirectly test YCCK JPEG compression/decompression\n");
+  printf("     (use the CMYK pixel format for packed-pixel source/destination buffers)\n");
+  printf("-fastupsample = Use the fastest chrominance upsampling algorithm available\n");
+  printf("-fastdct = Use the fastest DCT/IDCT algorithm available\n");
+  printf("-accuratedct = Use the most accurate DCT/IDCT algorithm available\n");
   printf("-progressive = Use progressive entropy coding in JPEG images generated by\n");
-  printf("     compression and transform operations.\n");
-  printf("-subsamp <s> = When testing JPEG compression, this option specifies the level\n");
-  printf("     of chrominance subsampling to use (<s> = 444, 422, 440, 420, 411, or\n");
-  printf("     GRAY).  The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in\n");
-  printf("     sequence.\n");
+  printf("     compression and transform operations\n");
+  printf("-subsamp <s> = When compressing, use the specified level of chrominance\n");
+  printf("     subsampling (<s> = 444, 422, 440, 420, 411, or GRAY) [default = test\n");
+  printf("     Grayscale, 4:2:0, 4:2:2, and 4:4:4 in sequence]\n");
   printf("-quiet = Output results in tabular rather than verbose format\n");
-  printf("-yuv = Test YUV encoding/decoding functions\n");
-  printf("-yuvpad <p> = If testing YUV encoding/decoding, this specifies the number of\n");
-  printf("     bytes to which each row of each plane in the intermediate YUV image is\n");
-  printf("     padded (default = 1)\n");
-  printf("-scale M/N = Scale down the width/height of the decompressed JPEG image by a\n");
+  printf("-yuv = Compress from/decompress to intermediate planar YUV images\n");
+  printf("-yuvpad <p> = The number of bytes by which each row in each plane of an\n");
+  printf("     intermediate YUV image is evenly divisible (must be a power of 2)\n");
+  printf("     [default = 1]\n");
+  printf("-scale M/N = When decompressing, scale the width/height of the JPEG image by a\n");
   printf("     factor of M/N (M/N = ");
   for (i = 0; i < nsf; i++) {
     printf("%d/%d", scalingFactors[i].num, scalingFactors[i].denom);
@@ -803,24 +806,24 @@
   }
   printf(")\n");
   printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n");
-  printf("     Perform the corresponding lossless transform prior to\n");
-  printf("     decompression (these options are mutually exclusive)\n");
-  printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n");
-  printf("     test (can be combined with the other transforms above)\n");
+  printf("     Perform the specified lossless transform operation on the input image\n");
+  printf("     prior to decompression (these operations are mutually exclusive)\n");
+  printf("-grayscale = Transform the input image into a grayscale JPEG image prior to\n");
+  printf("     decompression (can be combined with the other transform operations above)\n");
   printf("-copynone = Do not copy any extra markers (including EXIF and ICC profile data)\n");
-  printf("     when transforming the image.\n");
-  printf("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)\n");
-  printf("-warmup <t> = Run each benchmark for <t> seconds (default = 1.0) prior to\n");
+  printf("     when transforming the input image\n");
+  printf("-benchtime <t> = Run each benchmark for at least <t> seconds [default = 5.0]\n");
+  printf("-warmup <t> = Run each benchmark for <t> seconds [default = 1.0] prior to\n");
   printf("     starting the timer, in order to prime the caches and thus improve the\n");
-  printf("     consistency of the results.\n");
+  printf("     consistency of the benchmark results\n");
   printf("-componly = Stop after running compression tests.  Do not test decompression.\n");
   printf("-nowrite = Do not write reference or output images (improves consistency of\n");
-  printf("     performance measurements.)\n");
+  printf("     benchmark results)\n");
   printf("-limitscans = Refuse to decompress or transform progressive JPEG images that\n");
   printf("     have an unreasonably large number of scans\n");
   printf("-stoponwarning = Immediately discontinue the current\n");
-  printf("     compression/decompression/transform operation if the underlying codec\n");
-  printf("     throws a warning (non-fatal error)\n\n");
+  printf("     compression/decompression/transform operation if a warning (non-fatal\n");
+  printf("     error) occurs\n\n");
   printf("NOTE:  If the quality is specified as a range (e.g. 90-100), a separate\n");
   printf("test will be performed for all quality values in the range.\n\n");
   exit(1);
@@ -869,7 +872,7 @@
       if (!strcasecmp(argv[i], "-tile")) {
         doTile = 1;  xformOpt |= TJXOPT_CROP;
       } else if (!strcasecmp(argv[i], "-fastupsample")) {
-        fprintf(stderr, "Using fast upsampling code\n\n");
+        fprintf(stderr, "Using fastest upsampling algorithm\n\n");
         flags |= TJFLAG_FASTUPSAMPLE;
       } else if (!strcasecmp(argv[i], "-fastdct")) {
         fprintf(stderr, "Using fastest DCT/IDCT algorithm\n\n");
@@ -880,6 +883,7 @@
       } else if (!strcasecmp(argv[i], "-progressive")) {
         fprintf(stderr, "Using progressive entropy coding\n\n");
         flags |= TJFLAG_PROGRESSIVE;
+        xformOpt |= TJXOPT_PROGRESSIVE;
       } else if (!strcasecmp(argv[i], "-rgb"))
         pf = TJPF_RGB;
       else if (!strcasecmp(argv[i], "-rgbx"))
@@ -952,12 +956,13 @@
       else if (!strcasecmp(argv[i], "-bmp"))
         ext = "bmp";
       else if (!strcasecmp(argv[i], "-yuv")) {
-        fprintf(stderr, "Testing YUV planar encoding/decoding\n\n");
+        fprintf(stderr, "Testing planar YUV encoding/decoding\n\n");
         doYUV = 1;
       } else if (!strcasecmp(argv[i], "-yuvpad") && i < argc - 1) {
         int tempi = atoi(argv[++i]);
 
-        if (tempi >= 1) yuvPad = tempi;
+        if (tempi >= 1 && (tempi & (tempi - 1)) == 0) yuvAlign = tempi;
+        else usage(argv[0]);
       } else if (!strcasecmp(argv[i], "-subsamp") && i < argc - 1) {
         i++;
         if (toupper(argv[i][0]) == 'G') subsamp = TJSAMP_GRAY;
@@ -970,6 +975,7 @@
           case 440:  subsamp = TJSAMP_440;  break;
           case 420:  subsamp = TJSAMP_420;  break;
           case 411:  subsamp = TJSAMP_411;  break;
+          default:  usage(argv[0]);
           }
         }
       } else if (!strcasecmp(argv[i], "-componly"))
@@ -986,26 +992,26 @@
 
   if ((sf.num != 1 || sf.denom != 1) && doTile) {
     fprintf(stderr, "Disabling tiled compression/decompression tests, because those tests do not\n");
-    fprintf(stderr, "work when scaled decompression is enabled.\n");
-    doTile = 0;
+    fprintf(stderr, "work when scaled decompression is enabled.\n\n");
+    doTile = 0;  xformOpt &= (~TJXOPT_CROP);
   }
 
   if ((flags & TJFLAG_NOREALLOC) == 0 && doTile) {
     fprintf(stderr, "Disabling tiled compression/decompression tests, because those tests do not\n");
     fprintf(stderr, "work when dynamic JPEG buffer allocation is enabled.\n\n");
-    doTile = 0;
+    doTile = 0;  xformOpt &= (~TJXOPT_CROP);
   }
 
   if (!decompOnly) {
     if ((srcBuf = tjLoadImage(argv[1], &w, 1, &h, &pf, flags)) == NULL)
-      THROW_TJG("loading bitmap");
+      THROW_TJG("loading input image");
     temp = strrchr(argv[1], '.');
     if (temp != NULL) *temp = '\0';
   }
 
   if (quiet == 1 && !decompOnly) {
     fprintf(stderr, "All performance values in Mpixels/sec\n\n");
-    fprintf(stderr, "Bitmap     JPEG     JPEG  %s  %s   ",
+    fprintf(stderr, "Pixel      JPEG     JPEG  %s  %s   ",
             doTile ? "Tile " : "Image", doTile ? "Tile " : "Image");
     if (doYUV) fprintf(stderr, "Encode  ");
     fprintf(stderr, "Comp    Comp    Decomp  ");
diff --git a/tjunittest.c b/tjunittest.c
index af409a5..d5211f1 100644
--- a/tjunittest.c
+++ b/tjunittest.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C)2009-2014, 2017-2019 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2009-2014, 2017-2019, 2022-2023 D. R. Commander.
+ *                                              All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -30,9 +31,14 @@
  * This program tests the various code paths in the TurboJPEG C Wrapper
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 #include <errno.h>
 #include "tjutil.h"
 #include "turbojpeg.h"
@@ -51,11 +57,11 @@
 {
   printf("\nUSAGE: %s [options]\n\n", progName);
   printf("Options:\n");
-  printf("-yuv = test YUV encoding/decoding support\n");
-  printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n");
-  printf("            4-byte boundary\n");
-  printf("-alloc = test automatic buffer allocation\n");
-  printf("-bmp = tjLoadImage()/tjSaveImage() unit test\n\n");
+  printf("-yuv = test YUV encoding/compression/decompression/decoding\n");
+  printf("-noyuvpad = do not pad each row in each Y, U, and V plane to the nearest\n");
+  printf("            multiple of 4 bytes\n");
+  printf("-alloc = test automatic JPEG buffer allocation\n");
+  printf("-bmp = test packed-pixel image I/O\n");
   exit(1);
 }
 #endif
@@ -92,7 +98,7 @@
 static const int _onlyGray[] = { TJPF_GRAY };
 static const int _onlyRGB[] = { TJPF_RGB };
 
-static int doYUV = 0, alloc = 0, pad = 4;
+static int doYUV = 0, alloc = 0, yuvAlign = 4;
 
 static int exitStatus = 0;
 #define BAILOUT() { exitStatus = -1;  goto bailout; }
@@ -284,7 +290,7 @@
   int hsf = tjMCUWidth[subsamp] / 8, vsf = tjMCUHeight[subsamp] / 8;
   int pw = PAD(w, hsf), ph = PAD(h, vsf);
   int cw = pw / hsf, ch = ph / vsf;
-  int ypitch = PAD(pw, pad), uvpitch = PAD(cw, pad);
+  int ypitch = PAD(pw, yuvAlign), uvpitch = PAD(cw, yuvAlign);
   int retval = 1;
   int halfway = 16 * sf.num / sf.denom;
   int blocksize = 8 * sf.num / sf.denom;
@@ -354,7 +360,7 @@
 {
 #if defined(ANDROID) && defined(GTEST)
   char path[filePathSize];
-  snprintf(path, filePathSize, "/sdcard/%s", filename);
+  SNPRINTF(path, filePathSize, "/sdcard/%s", filename);
   FILE *file = fopen(path, "wb");
 #else
   FILE *file = fopen(filename, "wb");
@@ -389,7 +395,7 @@
 
   if (!alloc) flags |= TJFLAG_NOREALLOC;
   if (doYUV) {
-    unsigned long yuvSize = tjBufSizeYUV2(w, pad, h, subsamp);
+    unsigned long yuvSize = tjBufSizeYUV2(w, yuvAlign, h, subsamp);
     tjscalingfactor sf = { 1, 1 };
     tjhandle handle2 = tjInitCompress();
 
@@ -401,15 +407,15 @@
 
     fprintf(stderr, "%s %s -> YUV %s ... ", pfStr, buStrLong,
             subNameLong[subsamp]);
-    TRY_TJ(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp,
-                        flags));
+    TRY_TJ(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, yuvAlign,
+                        subsamp, flags));
     tjDestroy(handle2);
     if (checkBufYUV(yuvBuf, w, h, subsamp, sf)) fprintf(stderr, "Passed.\n");
     else fprintf(stderr, "FAILED!\n");
 
     fprintf(stderr, "YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp],
             buStrLong, jpegQual);
-    TRY_TJ(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf,
+    TRY_TJ(tjCompressFromYUV(handle, yuvBuf, w, yuvAlign, h, subsamp, dstBuf,
                              dstSize, jpegQual, flags));
   } else {
     fprintf(stderr, "%s %s -> %s Q%d ... ", pfStr, buStrLong,
@@ -418,7 +424,7 @@
                        jpegQual, flags));
   }
 
-  snprintf(tempStr, filePathSize, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr,
+  SNPRINTF(tempStr, filePathSize, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr,
            buStr, subName[subsamp], jpegQual);
   writeJPEG(*dstBuf, *dstSize, tempStr);
   fprintf(stderr, "Done.\n  Result in %s\n", tempStr);
@@ -451,7 +457,7 @@
   memset(dstBuf, 0, dstSize);
 
   if (doYUV) {
-    unsigned long yuvSize = tjBufSizeYUV2(scaledWidth, pad, scaledHeight,
+    unsigned long yuvSize = tjBufSizeYUV2(scaledWidth, yuvAlign, scaledHeight,
                                           subsamp);
     tjhandle handle2 = tjInitDecompress();
 
@@ -465,8 +471,12 @@
     if (sf.num != 1 || sf.denom != 1)
       fprintf(stderr, "%d/%d ... ", sf.num, sf.denom);
     else fprintf(stderr, "... ");
-    TRY_TJ(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth,
-                              pad, scaledHeight, flags));
+    /* We pass scaledWidth + 1 and scaledHeight + 1 to validate that
+       tjDecompressToYUV2() generates the largest possible scaled image that
+       fits within the desired dimensions, as documented. */
+    TRY_TJ(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf,
+                              scaledWidth + 1, yuvAlign, scaledHeight + 1,
+                              flags));
     if (checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf))
       fprintf(stderr, "Passed.\n");
     else fprintf(stderr, "FAILED!\n");
@@ -474,7 +484,7 @@
     fprintf(stderr, "YUV %s -> %s %s ... ", subNameLong[subsamp],
             pixFormatStr[pf],
             (flags & TJFLAG_BOTTOMUP) ? "Bottom-Up" : "Top-Down ");
-    TRY_TJ(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0,
+    TRY_TJ(tjDecodeYUV(handle2, yuvBuf, yuvAlign, subsamp, dstBuf, scaledWidth, 0,
                        scaledHeight, pf, flags));
     tjDestroy(handle2);
   } else {
@@ -483,8 +493,11 @@
     if (sf.num != 1 || sf.denom != 1)
       fprintf(stderr, "%d/%d ... ", sf.num, sf.denom);
     else fprintf(stderr, "... ");
-    TRY_TJ(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0,
-                         scaledHeight, pf, flags));
+    /* We pass scaledWidth + 1 and scaledHeight + 1 to validate that
+       tjDecompress2() generates the largest possible scaled image that fits
+       within the desired dimensions, as documented. */
+    TRY_TJ(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth + 1, 0,
+                         scaledHeight + 1, pf, flags));
   }
 
   if (checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags))
@@ -581,12 +594,17 @@
     THROW(#function " overflow"); \
 }
 #endif
+#define CHECKSIZEINT(function) { \
+  if (intsize != -1 || !strcmp(tjGetErrorStr2(NULL), "No error")) \
+    THROW(#function " overflow"); \
+}
 
 #ifndef GTEST
 static void overflowTest(void)
 {
   /* Ensure that the various buffer size functions don't overflow */
   unsigned long size;
+  int intsize;
 
   size = tjBufSize(26755, 26755, TJSAMP_444);
   CHECKSIZE(tjBufSize());
@@ -594,12 +612,20 @@
   CHECKSIZE(TJBUFSIZE());
   size = tjBufSizeYUV2(37838, 1, 37838, TJSAMP_444);
   CHECKSIZE(tjBufSizeYUV2());
+  size = tjBufSizeYUV2(37837, 3, 37837, TJSAMP_444);
+  CHECKSIZE(tjBufSizeYUV2());
+  size = tjBufSizeYUV2(37837, -1, 37837, TJSAMP_444);
+  CHECKSIZE(tjBufSizeYUV2());
   size = TJBUFSIZEYUV(37838, 37838, TJSAMP_444);
   CHECKSIZE(TJBUFSIZEYUV());
   size = tjBufSizeYUV(37838, 37838, TJSAMP_444);
   CHECKSIZE(tjBufSizeYUV());
   size = tjPlaneSizeYUV(0, 65536, 0, 65536, TJSAMP_444);
   CHECKSIZE(tjPlaneSizeYUV());
+  intsize = tjPlaneWidth(0, INT_MAX, TJSAMP_420);
+  CHECKSIZEINT(tjPlaneWidth());
+  intsize = tjPlaneHeight(0, INT_MAX, TJSAMP_420);
+  CHECKSIZEINT(tjPlaneHeight());
 
 bailout:
   return;
@@ -627,7 +653,7 @@
         if ((srcBuf = (unsigned char *)malloc(w * h * 4)) == NULL)
           THROW("Memory allocation failure");
         if (!alloc || doYUV) {
-          if (doYUV) dstSize = tjBufSizeYUV2(w, pad, h, subsamp);
+          if (doYUV) dstSize = tjBufSizeYUV2(w, yuvAlign, h, subsamp);
           else dstSize = tjBufSize(w, h, subsamp);
           if ((dstBuf = (unsigned char *)tjAlloc(dstSize)) == NULL)
             THROW("Memory allocation failure");
@@ -639,8 +665,8 @@
         }
 
         if (doYUV) {
-          TRY_TJ(tjEncodeYUV3(handle, srcBuf, w, 0, h, TJPF_BGRX, dstBuf, pad,
-                              subsamp, 0));
+          TRY_TJ(tjEncodeYUV3(handle, srcBuf, w, 0, h, TJPF_BGRX, dstBuf,
+                              yuvAlign, subsamp, 0));
         } else {
           TRY_TJ(tjCompress2(handle, srcBuf, w, 0, h, TJPF_BGRX, &dstBuf,
                              &dstSize, subsamp, 100,
@@ -654,7 +680,7 @@
         if ((srcBuf = (unsigned char *)malloc(h * w * 4)) == NULL)
           THROW("Memory allocation failure");
         if (!alloc || doYUV) {
-          if (doYUV) dstSize = tjBufSizeYUV2(h, pad, w, subsamp);
+          if (doYUV) dstSize = tjBufSizeYUV2(h, yuvAlign, w, subsamp);
           else dstSize = tjBufSize(h, w, subsamp);
           if ((dstBuf = (unsigned char *)tjAlloc(dstSize)) == NULL)
             THROW("Memory allocation failure");
@@ -666,8 +692,8 @@
         }
 
         if (doYUV) {
-          TRY_TJ(tjEncodeYUV3(handle, srcBuf, h, 0, w, TJPF_BGRX, dstBuf, pad,
-                              subsamp, 0));
+          TRY_TJ(tjEncodeYUV3(handle, srcBuf, h, 0, w, TJPF_BGRX, dstBuf,
+                              yuvAlign, subsamp, 0));
         } else {
           TRY_TJ(tjCompress2(handle, srcBuf, h, 0, w, TJPF_BGRX, &dstBuf,
                              &dstSize, subsamp, 100,
@@ -797,11 +823,11 @@
   initBitmap(buf, width, pitch, height, pf, flags);
 
 #if defined(ANDROID) && defined(GTEST)
-  snprintf(filename, filenameSize, "/sdcard/test_bmp_%s_%d_%s.%s",
+  SNPRINTF(filename, filenameSize, "/sdcard/test_bmp_%s_%d_%s.%s",
            pixFormatStr[pf], align, (flags & TJFLAG_BOTTOMUP) ? "bu" : "td",
            ext);
 #else
-  snprintf(filename, filenameSize, "test_bmp_%s_%d_%s.%s", pixFormatStr[pf],
+  SNPRINTF(filename, filenameSize, "test_bmp_%s_%d_%s.%s", pixFormatStr[pf],
            align, (flags & TJFLAG_BOTTOMUP) ? "bu" : "td", ext);
 #endif
   TRY_TJ(tjSaveImage(filename, buf, width, pitch, height, pf, flags));
@@ -913,7 +939,7 @@
 static void initTJUnitTest(int yuv, int noyuvpad, int autoalloc)
 {
   doYUV = yuv ? 1 : 0;
-  pad = noyuvpad ? 1 : 4;
+  yuvAlign = noyuvpad ? 1 : 4;
   alloc = autoalloc ? 1 : 0;
 
   exitStatus = 0;
@@ -1133,7 +1159,7 @@
   if (argc > 1) {
     for (i = 1; i < argc; i++) {
       if (!strcasecmp(argv[i], "-yuv")) doYUV = 1;
-      else if (!strcasecmp(argv[i], "-noyuvpad")) pad = 1;
+      else if (!strcasecmp(argv[i], "-noyuvpad")) yuvAlign = 1;
       else if (!strcasecmp(argv[i], "-alloc")) alloc = 1;
       else if (!strcasecmp(argv[i], "-bmp")) return bmpTest();
       else usage(argv[0]);
diff --git a/tjutil.h b/tjutil.h
index 8542bab..10272e9 100644
--- a/tjutil.h
+++ b/tjutil.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011, 2022 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -27,14 +27,20 @@
  */
 
 #ifdef _WIN32
-#ifndef __MINGW32__
-#include <stdio.h>
-#define snprintf(str, n, format, ...) \
-  _snprintf_s(str, n, _TRUNCATE, format, ##__VA_ARGS__)
-#endif
+#ifndef strcasecmp
 #define strcasecmp  stricmp
+#endif
+#ifndef strncasecmp
 #define strncasecmp  strnicmp
 #endif
+#endif
+
+#ifdef _MSC_VER
+#define SNPRINTF(str, n, format, ...) \
+  _snprintf_s(str, n, _TRUNCATE, format, ##__VA_ARGS__)
+#else
+#define SNPRINTF  snprintf
+#endif
 
 #ifndef min
 #define min(a, b)  ((a) < (b) ? (a) : (b))
diff --git a/transupp.c b/transupp.c
index 6e86077..78dc91b 100644
--- a/transupp.c
+++ b/transupp.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1997-2019, Thomas G. Lane, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2017, D. R. Commander.
+ * Copyright (C) 2010, 2017, 2021-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -143,7 +143,7 @@
         for (k = 0; k < DCTSIZE2; k++) {
           temp = qtblptr->quantval[k];
           qval = qtblptr1->quantval[k];
-          if (temp != qval) {
+          if (temp != qval && qval != 0) {
             temp *= ptr[k];
             /* The following quantization code is copied from jcdctmgr.c */
 #ifdef FAST_DIVIDE
@@ -262,8 +262,8 @@
         }
       } else {
         for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-          MEMZERO(dst_buffer[offset_y] + x_drop_blocks,
-                  comp_width * sizeof(JBLOCK));
+          memset(dst_buffer[offset_y] + x_drop_blocks, 0,
+                 comp_width * sizeof(JBLOCK));
         }
       }
     }
@@ -345,8 +345,8 @@
         if (dst_blk_y < y_crop_blocks ||
             dst_blk_y >= y_crop_blocks + comp_height) {
           for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-            MEMZERO(dst_buffer[offset_y],
-                    compptr->width_in_blocks * sizeof(JBLOCK));
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
           }
           continue;
         }
@@ -363,14 +363,14 @@
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
         if (dstinfo->_jpeg_width > srcinfo->output_width) {
           if (x_crop_blocks > 0) {
-            MEMZERO(dst_buffer[offset_y], x_crop_blocks * sizeof(JBLOCK));
+            memset(dst_buffer[offset_y], 0, x_crop_blocks * sizeof(JBLOCK));
           }
           jcopy_block_row(src_buffer[offset_y],
                           dst_buffer[offset_y] + x_crop_blocks, comp_width);
           if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
-            MEMZERO(dst_buffer[offset_y] + x_crop_blocks + comp_width,
-                    (compptr->width_in_blocks - x_crop_blocks - comp_width) *
-                    sizeof(JBLOCK));
+            memset(dst_buffer[offset_y] + x_crop_blocks + comp_width, 0,
+                   (compptr->width_in_blocks - x_crop_blocks - comp_width) *
+                   sizeof(JBLOCK));
           }
         } else {
           jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
@@ -421,8 +421,8 @@
         if (dst_blk_y < y_crop_blocks ||
             dst_blk_y >= y_crop_blocks + comp_height) {
           for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-            MEMZERO(dst_buffer[offset_y],
-                    compptr->width_in_blocks * sizeof(JBLOCK));
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
           }
           continue;
         }
@@ -438,7 +438,7 @@
       }
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
         if (x_crop_blocks > 0) {
-          MEMZERO(dst_buffer[offset_y], x_crop_blocks * sizeof(JBLOCK));
+          memset(dst_buffer[offset_y], 0, x_crop_blocks * sizeof(JBLOCK));
           dc = src_buffer[offset_y][0][0];
           for (dst_blk_x = 0; dst_blk_x < x_crop_blocks; dst_blk_x++) {
             dst_buffer[offset_y][dst_blk_x][0] = dc;
@@ -447,9 +447,9 @@
         jcopy_block_row(src_buffer[offset_y],
                         dst_buffer[offset_y] + x_crop_blocks, comp_width);
         if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
-          MEMZERO(dst_buffer[offset_y] + x_crop_blocks + comp_width,
-                  (compptr->width_in_blocks - x_crop_blocks - comp_width) *
-                  sizeof(JBLOCK));
+          memset(dst_buffer[offset_y] + x_crop_blocks + comp_width, 0,
+                 (compptr->width_in_blocks - x_crop_blocks - comp_width) *
+                 sizeof(JBLOCK));
           dc = src_buffer[offset_y][comp_width - 1][0];
           for (dst_blk_x = x_crop_blocks + comp_width;
                dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
@@ -502,8 +502,8 @@
         if (dst_blk_y < y_crop_blocks ||
             dst_blk_y >= y_crop_blocks + comp_height) {
           for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-            MEMZERO(dst_buffer[offset_y],
-                    compptr->width_in_blocks * sizeof(JBLOCK));
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
           }
           continue;
         }
@@ -591,7 +591,8 @@
         ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
          (JDIMENSION)compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-        MEMZERO(buffer[offset_y] + x_wipe_blocks, wipe_width * sizeof(JBLOCK));
+        memset(buffer[offset_y] + x_wipe_blocks, 0,
+               wipe_width * sizeof(JBLOCK));
       }
     }
   }
@@ -626,7 +627,8 @@
         ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
          (JDIMENSION)compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-        MEMZERO(buffer[offset_y] + x_wipe_blocks, wipe_width * sizeof(JBLOCK));
+        memset(buffer[offset_y] + x_wipe_blocks, 0,
+               wipe_width * sizeof(JBLOCK));
         if (x_wipe_blocks > 0) {
           dc_left_value = buffer[offset_y][x_wipe_blocks - 1][0];
           if (wipe_right < compptr->width_in_blocks) {
@@ -709,8 +711,8 @@
             }
           }
         } else {
-          MEMZERO(buffer[offset_y] + x_wipe_blocks,
-                  wipe_width * sizeof(JBLOCK));
+          memset(buffer[offset_y] + x_wipe_blocks, 0,
+                 wipe_width * sizeof(JBLOCK));
         }
       }
     }
@@ -2310,7 +2312,7 @@
   int m;
 
   /* Save comments except under NONE option */
-  if (option != JCOPYOPT_NONE) {
+  if (option != JCOPYOPT_NONE && option != JCOPYOPT_ICC) {
     jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF);
   }
   /* Save all types of APPn markers iff ALL option */
@@ -2321,6 +2323,10 @@
       jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF);
     }
   }
+  /* Save only APP2 markers if ICC option selected */
+  if (option == JCOPYOPT_ICC) {
+    jpeg_save_markers(srcinfo, JPEG_APP0 + 2, 0xFFFF);
+  }
 #endif /* SAVE_MARKERS_SUPPORTED */
 }
 
diff --git a/transupp.h b/transupp.h
index ea6be1f..cea1f40 100644
--- a/transupp.h
+++ b/transupp.h
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1997-2019, Thomas G. Lane, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2017, D. R. Commander.
+ * Copyright (C) 2017, 2021, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -213,10 +213,11 @@
  */
 
 typedef enum {
-  JCOPYOPT_NONE,          /* copy no optional markers */
-  JCOPYOPT_COMMENTS,      /* copy only comment (COM) markers */
-  JCOPYOPT_ALL,           /* copy all optional markers */
-  JCOPYOPT_ALL_EXCEPT_ICC /* copy all optional markers except APP2 */
+  JCOPYOPT_NONE,           /* copy no optional markers */
+  JCOPYOPT_COMMENTS,       /* copy only comment (COM) markers */
+  JCOPYOPT_ALL,            /* copy all optional markers */
+  JCOPYOPT_ALL_EXCEPT_ICC, /* copy all optional markers except APP2 */
+  JCOPYOPT_ICC             /* copy only ICC profile (APP2) markers */
 } JCOPY_OPTION;
 
 #define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS     /* recommended default */
diff --git a/turbojpeg-jni.c b/turbojpeg-jni.c
index 1b728e3..446cbd2 100644
--- a/turbojpeg-jni.c
+++ b/turbojpeg-jni.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2011-2020 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2011-2023 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -26,12 +26,9 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <stdlib.h>
-#include <string.h>
+#include <limits.h>
 #include "turbojpeg.h"
-#ifdef WIN32
-#include "tjutil.h"
-#endif
+#include "jinclude.h"
 #include <jni.h>
 #include "java/org_libjpegturbo_turbojpeg_TJCompressor.h"
 #include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h"
@@ -44,6 +41,12 @@
   } \
 }
 
+#define BAILIF0NOEC(f) { \
+  if (!(f)) { \
+    goto bailout; \
+  } \
+}
+
 #define THROW(msg, exceptionClass) { \
   jclass _exccls = (*env)->FindClass(env, exceptionClass); \
   \
@@ -82,20 +85,20 @@
   BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "handle", "J")); \
   handle = (tjhandle)(size_t)(*env)->GetLongField(env, obj, _fid);
 
-#ifdef _WIN32
-#define setenv(envvar, value, dummy)  _putenv_s(envvar, value)
-#endif
-
+#ifndef NO_PUTENV
 #define PROP2ENV(property, envvar) { \
-  if ((jName = (*env)->NewStringUTF(env, property)) != NULL && \
-      (jValue = (*env)->CallStaticObjectMethod(env, cls, mid, \
-                                               jName)) != NULL) { \
-    if ((value = (*env)->GetStringUTFChars(env, jValue, 0)) != NULL) { \
-      setenv(envvar, value, 1); \
+  if ((jName = (*env)->NewStringUTF(env, property)) != NULL) { \
+    jboolean exception; \
+    jValue = (*env)->CallStaticObjectMethod(env, cls, mid, jName); \
+    exception = (*env)->ExceptionCheck(env); \
+    if (jValue && !exception && \
+        (value = (*env)->GetStringUTFChars(env, jValue, 0)) != NULL) { \
+      PUTENV_S(envvar, value); \
       (*env)->ReleaseStringUTFChars(env, jValue, value); \
     } \
   } \
 }
+#endif
 
 #define SAFE_RELEASE(javaArray, cArray) { \
   if (javaArray && cArray) \
@@ -114,10 +117,12 @@
   BAILIF0(mid = (*env)->GetStaticMethodID(env, cls, "getProperty",
     "(Ljava/lang/String;)Ljava/lang/String;"));
 
+#ifndef NO_PUTENV
   PROP2ENV("turbojpeg.optimize", "TJ_OPTIMIZE");
   PROP2ENV("turbojpeg.arithmetic", "TJ_ARITHMETIC");
   PROP2ENV("turbojpeg.restart", "TJ_RESTART");
   PROP2ENV("turbojpeg.progressive", "TJ_PROGRESSIVE");
+#endif
   return 0;
 
 bailout:
@@ -128,24 +133,28 @@
 JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize
   (JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp)
 {
-  jint retval = (jint)tjBufSize(width, height, jpegSubsamp);
+  unsigned long retval = tjBufSize(width, height, jpegSubsamp);
 
-  if (retval == -1) THROW_ARG(tjGetErrorStr());
+  if (retval == (unsigned long)-1) THROW_ARG(tjGetErrorStr());
+  if (retval > (unsigned long)INT_MAX)
+    THROW_ARG("Image is too large");
 
 bailout:
-  return retval;
+  return (jint)retval;
 }
 
 /* TurboJPEG 1.4.x: TJ::bufSizeYUV() */
 JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII
-  (JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp)
+  (JNIEnv *env, jclass cls, jint width, jint align, jint height, jint subsamp)
 {
-  jint retval = (jint)tjBufSizeYUV2(width, pad, height, subsamp);
+  unsigned long retval = tjBufSizeYUV2(width, align, height, subsamp);
 
-  if (retval == -1) THROW_ARG(tjGetErrorStr());
+  if (retval == (unsigned long)-1) THROW_ARG(tjGetErrorStr());
+  if (retval > (unsigned long)INT_MAX)
+    THROW_ARG("Image is too large");
 
 bailout:
-  return retval;
+  return (jint)retval;
 }
 
 /* TurboJPEG 1.2.x: TJ::bufSizeYUV() */
@@ -162,13 +171,15 @@
   (JNIEnv *env, jclass cls, jint componentID, jint width, jint stride,
    jint height, jint subsamp)
 {
-  jint retval = (jint)tjPlaneSizeYUV(componentID, width, stride, height,
-                                     subsamp);
+  unsigned long retval = tjPlaneSizeYUV(componentID, width, stride, height,
+                                        subsamp);
 
-  if (retval == -1) THROW_ARG(tjGetErrorStr());
+  if (retval == (unsigned long)-1) THROW_ARG(tjGetErrorStr());
+  if (retval > (unsigned long)INT_MAX)
+    THROW_ARG("Image is too large");
 
 bailout:
-  return retval;
+  return (jint)retval;
 }
 
 /* TurboJPEG 1.4.x: TJ::planeWidth() */
@@ -242,8 +253,8 @@
 
   if (ProcessSystemProperties(env) < 0) goto bailout;
 
-  BAILIF0(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
-  BAILIF0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+  BAILIF0NOEC(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
 
   if (tjCompress2(handle, &srcBuf[y * actualPitch + x * tjPixelSize[pf]],
                   width, pitch, height, pf, &jpegBuf, &jpegSize, jpegSubsamp,
@@ -328,8 +339,8 @@
   jbyteArray jSrcPlanes[3] = { NULL, NULL, NULL };
   const unsigned char *srcPlanesTmp[3] = { NULL, NULL, NULL };
   const unsigned char *srcPlanes[3] = { NULL, NULL, NULL };
-  int *srcOffsetsTmp = NULL, srcOffsets[3] = { 0, 0, 0 };
-  int *srcStridesTmp = NULL, srcStrides[3] = { 0, 0, 0 };
+  jint srcOffsetsTmp[3] = { 0, 0, 0 }, srcStridesTmp[3] = { 0, 0, 0 };
+  int srcOffsets[3] = { 0, 0, 0 }, srcStrides[3] = { 0, 0, 0 };
   unsigned char *jpegBuf = NULL;
   int nc = (subsamp == org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY ? 1 : 3), i;
 
@@ -353,15 +364,15 @@
 
   if (ProcessSystemProperties(env) < 0) goto bailout;
 
-  BAILIF0(srcOffsetsTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jSrcOffsets, 0));
-  for (i = 0; i < nc; i++) srcOffsets[i] = srcOffsetsTmp[i];
-  SAFE_RELEASE(jSrcOffsets, srcOffsetsTmp);
+  (*env)->GetIntArrayRegion(env, jSrcOffsets, 0, nc, srcOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcOffsets[i] = srcOffsetsTmp[i];
 
-  BAILIF0(srcStridesTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0));
-  for (i = 0; i < nc; i++) srcStrides[i] = srcStridesTmp[i];
-  SAFE_RELEASE(jSrcStrides, srcStridesTmp);
+  (*env)->GetIntArrayRegion(env, jSrcStrides, 0, nc, srcStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcStrides[i] = srcStridesTmp[i];
 
   for (i = 0; i < nc; i++) {
     int planeSize = tjPlaneSizeYUV(i, width, srcStrides[i], height, subsamp);
@@ -379,23 +390,27 @@
     if ((*env)->GetArrayLength(env, jSrcPlanes[i]) <
         srcOffsets[i] + planeSize)
       THROW_ARG("Source plane is not large enough");
-
-    BAILIF0(srcPlanesTmp[i] =
-            (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0));
-    srcPlanes[i] = &srcPlanesTmp[i][srcOffsets[i]];
-    SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
   }
-  BAILIF0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(srcPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0));
+    srcPlanes[i] = &srcPlanesTmp[i][srcOffsets[i]];
+  }
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
 
   if (tjCompressFromYUVPlanes(handle, srcPlanes, width, srcStrides, height,
                               subsamp, &jpegBuf, &jpegSize, jpegQual,
                               flags | TJFLAG_NOREALLOC) == -1) {
     SAFE_RELEASE(dst, jpegBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
     THROW_TJ();
   }
 
 bailout:
   SAFE_RELEASE(dst, jpegBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
   return (jint)jpegSize;
 }
 
@@ -410,8 +425,8 @@
   jbyteArray jDstPlanes[3] = { NULL, NULL, NULL };
   unsigned char *dstPlanesTmp[3] = { NULL, NULL, NULL };
   unsigned char *dstPlanes[3] = { NULL, NULL, NULL };
-  int *dstOffsetsTmp = NULL, dstOffsets[3] = { 0, 0, 0 };
-  int *dstStridesTmp = NULL, dstStrides[3] = { 0, 0, 0 };
+  jint dstOffsetsTmp[3] = { 0, 0, 0 }, dstStridesTmp[3] = { 0, 0, 0 };
+  int dstOffsets[3] = { 0, 0, 0 }, dstStrides[3] = { 0, 0, 0 };
   int nc = (subsamp == org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY ? 1 : 3), i;
 
   GET_HANDLE();
@@ -436,15 +451,15 @@
   if ((*env)->GetArrayLength(env, src) * srcElementSize < arraySize)
     THROW_ARG("Source buffer is not large enough");
 
-  BAILIF0(dstOffsetsTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jDstOffsets, 0));
-  for (i = 0; i < nc; i++) dstOffsets[i] = dstOffsetsTmp[i];
-  SAFE_RELEASE(jDstOffsets, dstOffsetsTmp);
+  (*env)->GetIntArrayRegion(env, jDstOffsets, 0, nc, dstOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstOffsets[i] = dstOffsetsTmp[i];
 
-  BAILIF0(dstStridesTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0));
-  for (i = 0; i < nc; i++) dstStrides[i] = dstStridesTmp[i];
-  SAFE_RELEASE(jDstStrides, dstStridesTmp);
+  (*env)->GetIntArrayRegion(env, jDstStrides, 0, nc, dstStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstStrides[i] = dstStridesTmp[i];
 
   for (i = 0; i < nc; i++) {
     int planeSize = tjPlaneSizeYUV(i, width, dstStrides[i], height, subsamp);
@@ -462,23 +477,27 @@
     if ((*env)->GetArrayLength(env, jDstPlanes[i]) <
         dstOffsets[i] + planeSize)
       THROW_ARG("Destination plane is not large enough");
-
-    BAILIF0(dstPlanesTmp[i] =
-            (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0));
-    dstPlanes[i] = &dstPlanesTmp[i][dstOffsets[i]];
-    SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
   }
-  BAILIF0(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(dstPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0));
+    dstPlanes[i] = &dstPlanesTmp[i][dstOffsets[i]];
+  }
+  BAILIF0NOEC(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
 
   if (tjEncodeYUVPlanes(handle, &srcBuf[y * actualPitch + x * tjPixelSize[pf]],
                         width, pitch, height, pf, dstPlanes, dstStrides,
                         subsamp, flags) == -1) {
     SAFE_RELEASE(src, srcBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
     THROW_TJ();
   }
 
 bailout:
   SAFE_RELEASE(src, srcBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
 }
 
 /* TurboJPEG 1.4.x: TJCompressor::encodeYUV() byte source */
@@ -533,8 +552,8 @@
       (jsize)tjBufSizeYUV(width, height, subsamp))
     THROW_ARG("Destination buffer is not large enough");
 
-  BAILIF0(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
-  BAILIF0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+  BAILIF0NOEC(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  BAILIF0NOEC(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
 
   if (tjEncodeYUV2(handle, srcBuf, width, pitch, height, pf, dstBuf, subsamp,
                    flags) == -1) {
@@ -653,7 +672,7 @@
   if ((*env)->GetArrayLength(env, src) < jpegSize)
     THROW_ARG("Source buffer is not large enough");
 
-  BAILIF0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
 
   if (tjDecompressHeader3(handle, jpegBuf, (unsigned long)jpegSize, &width,
                           &height, &jpegSubsamp, &jpegColorspace) == -1) {
@@ -701,8 +720,8 @@
   if ((*env)->GetArrayLength(env, dst) * dstElementSize < arraySize)
     THROW_ARG("Destination buffer is not large enough");
 
-  BAILIF0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
-  BAILIF0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  BAILIF0NOEC(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
 
   if (tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize,
                     &dstBuf[y * actualPitch + x * tjPixelSize[pf]], width,
@@ -780,8 +799,8 @@
   jbyteArray jDstPlanes[3] = { NULL, NULL, NULL };
   unsigned char *dstPlanesTmp[3] = { NULL, NULL, NULL };
   unsigned char *dstPlanes[3] = { NULL, NULL, NULL };
-  int *dstOffsetsTmp = NULL, dstOffsets[3] = { 0, 0, 0 };
-  int *dstStridesTmp = NULL, dstStrides[3] = { 0, 0, 0 };
+  jint dstOffsetsTmp[3] = { 0, 0, 0 }, dstStridesTmp[3] = { 0, 0, 0 };
+  int dstOffsets[3] = { 0, 0, 0 }, dstStrides[3] = { 0, 0, 0 };
   int jpegSubsamp = -1, jpegWidth = 0, jpegHeight = 0;
   int nc = 0, i, width, height, scaledWidth, scaledHeight, nsf = 0;
   tjscalingfactor *sf;
@@ -815,15 +834,15 @@
   if (i >= nsf)
     THROW_ARG("Could not scale down to desired image dimensions");
 
-  BAILIF0(dstOffsetsTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jDstOffsets, 0));
-  for (i = 0; i < nc; i++) dstOffsets[i] = dstOffsetsTmp[i];
-  SAFE_RELEASE(jDstOffsets, dstOffsetsTmp);
+  (*env)->GetIntArrayRegion(env, jDstOffsets, 0, nc, dstOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstOffsets[i] = dstOffsetsTmp[i];
 
-  BAILIF0(dstStridesTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0));
-  for (i = 0; i < nc; i++) dstStrides[i] = dstStridesTmp[i];
-  SAFE_RELEASE(jDstStrides, dstStridesTmp);
+  (*env)->GetIntArrayRegion(env, jDstStrides, 0, nc, dstStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstStrides[i] = dstStridesTmp[i];
 
   for (i = 0; i < nc; i++) {
     int planeSize = tjPlaneSizeYUV(i, scaledWidth, dstStrides[i], scaledHeight,
@@ -842,23 +861,27 @@
     if ((*env)->GetArrayLength(env, jDstPlanes[i]) <
         dstOffsets[i] + planeSize)
       THROW_ARG("Destination plane is not large enough");
-
-    BAILIF0(dstPlanesTmp[i] =
-            (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0));
-    dstPlanes[i] = &dstPlanesTmp[i][dstOffsets[i]];
-    SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
   }
-  BAILIF0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(dstPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0));
+    dstPlanes[i] = &dstPlanesTmp[i][dstOffsets[i]];
+  }
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
 
   if (tjDecompressToYUVPlanes(handle, jpegBuf, (unsigned long)jpegSize,
                               dstPlanes, desiredWidth, dstStrides,
                               desiredHeight, flags) == -1) {
     SAFE_RELEASE(src, jpegBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
     THROW_TJ();
   }
 
 bailout:
   SAFE_RELEASE(src, jpegBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
 }
 
 /* TurboJPEG 1.2.x: TJDecompressor::decompressToYUV() */
@@ -884,8 +907,8 @@
       (jsize)tjBufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp))
     THROW_ARG("Destination buffer is not large enough");
 
-  BAILIF0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
-  BAILIF0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  BAILIF0NOEC(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
 
   if (tjDecompressToYUV(handle, jpegBuf, (unsigned long)jpegSize, dstBuf,
                         flags) == -1) {
@@ -909,8 +932,8 @@
   jbyteArray jSrcPlanes[3] = { NULL, NULL, NULL };
   const unsigned char *srcPlanesTmp[3] = { NULL, NULL, NULL };
   const unsigned char *srcPlanes[3] = { NULL, NULL, NULL };
-  int *srcOffsetsTmp = NULL, srcOffsets[3] = { 0, 0, 0 };
-  int *srcStridesTmp = NULL, srcStrides[3] = { 0, 0, 0 };
+  jint srcOffsetsTmp[3] = { 0, 0, 0 }, srcStridesTmp[3] = { 0, 0, 0 };
+  int srcOffsets[3] = { 0, 0, 0 }, srcStrides[3] = { 0, 0, 0 };
   unsigned char *dstBuf = NULL;
   int nc = (subsamp == org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY ? 1 : 3), i;
 
@@ -935,15 +958,15 @@
   if ((*env)->GetArrayLength(env, dst) * dstElementSize < arraySize)
     THROW_ARG("Destination buffer is not large enough");
 
-  BAILIF0(srcOffsetsTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jSrcOffsets, 0));
-  for (i = 0; i < nc; i++) srcOffsets[i] = srcOffsetsTmp[i];
-  SAFE_RELEASE(jSrcOffsets, srcOffsetsTmp);
+  (*env)->GetIntArrayRegion(env, jSrcOffsets, 0, nc, srcOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcOffsets[i] = srcOffsetsTmp[i];
 
-  BAILIF0(srcStridesTmp =
-          (*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0));
-  for (i = 0; i < nc; i++) srcStrides[i] = srcStridesTmp[i];
-  SAFE_RELEASE(jSrcStrides, srcStridesTmp);
+  (*env)->GetIntArrayRegion(env, jSrcStrides, 0, nc, srcStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcStrides[i] = srcStridesTmp[i];
 
   for (i = 0; i < nc; i++) {
     int planeSize = tjPlaneSizeYUV(i, width, srcStrides[i], height, subsamp);
@@ -961,23 +984,27 @@
     if ((*env)->GetArrayLength(env, jSrcPlanes[i]) <
         srcOffsets[i] + planeSize)
       THROW_ARG("Source plane is not large enough");
-
-    BAILIF0(srcPlanesTmp[i] =
-            (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0));
-    srcPlanes[i] = &srcPlanesTmp[i][srcOffsets[i]];
-    SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
   }
-  BAILIF0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(srcPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0));
+    srcPlanes[i] = &srcPlanesTmp[i][srcOffsets[i]];
+  }
+  BAILIF0NOEC(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
 
   if (tjDecodeYUVPlanes(handle, srcPlanes, srcStrides, subsamp,
                         &dstBuf[y * actualPitch + x * tjPixelSize[pf]], width,
                         pitch, height, pf, flags) == -1) {
     SAFE_RELEASE(dst, dstBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
     THROW_TJ();
   }
 
 bailout:
   SAFE_RELEASE(dst, dstBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
 }
 
 /* TurboJPEG 1.4.x: TJDecompressor::decodeYUV() byte destination */
@@ -1183,10 +1210,10 @@
         tjBufSize(w, h, jpegSubsamp))
       THROW_ARG("Destination buffer is not large enough");
   }
-  BAILIF0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0));
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0));
   for (i = 0; i < n; i++)
-    BAILIF0(dstBufs[i] =
-            (*env)->GetPrimitiveArrayCritical(env, jdstBufs[i], 0));
+    BAILIF0NOEC(dstBufs[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jdstBufs[i], 0));
 
   if (tjTransform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t,
                   flags | TJFLAG_NOREALLOC) == -1) {
diff --git a/turbojpeg-mapfile b/turbojpeg-mapfile
index 5477fed..07a429b 100755
--- a/turbojpeg-mapfile
+++ b/turbojpeg-mapfile
@@ -1,14 +1,14 @@
 TURBOJPEG_1.0
 {
   global:
-    tjInitCompress;
-    tjCompress;
     TJBUFSIZE;
-    tjInitDecompress;
-    tjDecompressHeader;
+    tjCompress;
     tjDecompress;
+    tjDecompressHeader;
     tjDestroy;
     tjGetErrorStr;
+    tjInitCompress;
+    tjInitDecompress;
   local:
     *;
 };
diff --git a/turbojpeg-mapfile.jni b/turbojpeg-mapfile.jni
index 4432791..4ae25aa 100755
--- a/turbojpeg-mapfile.jni
+++ b/turbojpeg-mapfile.jni
@@ -1,14 +1,14 @@
 TURBOJPEG_1.0
 {
   global:
-    tjInitCompress;
-    tjCompress;
     TJBUFSIZE;
-    tjInitDecompress;
-    tjDecompressHeader;
+    tjCompress;
     tjDecompress;
+    tjDecompressHeader;
     tjDestroy;
     tjGetErrorStr;
+    tjInitCompress;
+    tjInitDecompress;
   local:
     *;
 };
diff --git a/turbojpeg.c b/turbojpeg.c
index 793a3ee..b5498dc 100644
--- a/turbojpeg.c
+++ b/turbojpeg.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C)2009-2021 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2009-2023 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2021 Alex Richardson.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -29,9 +30,8 @@
 /* TurboJPEG/LJT:  this implements the TurboJPEG API using libjpeg or
    libjpeg-turbo */
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <ctype.h>
+#include <limits.h>
 #include <jinclude.h>
 #define JPEG_INTERNALS
 #include <jpeglib.h>
@@ -43,7 +43,6 @@
 #include "transupp.h"
 #include "./jpegcomp.h"
 #include "./cdjpeg.h"
-#include "jconfigint.h"
 
 extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, unsigned long *,
                              boolean);
@@ -99,7 +98,7 @@
 }
 
 
-/* Global structures, macros, etc. */
+/********************** Global structures, macros, etc. **********************/
 
 enum { COMPRESS = 1, DECOMPRESS = 2 };
 
@@ -127,9 +126,9 @@
     int scan_no = ((j_decompress_ptr)dinfo)->input_scan_number;
 
     if (scan_no > 500) {
-      snprintf(myprog->this->errStr, JMSG_LENGTH_MAX,
+      SNPRINTF(myprog->this->errStr, JMSG_LENGTH_MAX,
                "Progressive JPEG image has more than 500 scans");
-      snprintf(errStr, JMSG_LENGTH_MAX,
+      SNPRINTF(errStr, JMSG_LENGTH_MAX,
                "Progressive JPEG image has more than 500 scans");
       myprog->this->isInstanceError = TRUE;
       myerr->warning = FALSE;
@@ -192,15 +191,24 @@
 };
 
 #define THROWG(m) { \
-  snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s", m); \
   retval = -1;  goto bailout; \
 }
+#ifdef _MSC_VER
 #define THROW_UNIX(m) { \
-  snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, strerror(errno)); \
+  char strerrorBuf[80] = { 0 }; \
+  strerror_s(strerrorBuf, 80, errno); \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, strerrorBuf); \
   retval = -1;  goto bailout; \
 }
+#else
+#define THROW_UNIX(m) { \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, strerror(errno)); \
+  retval = -1;  goto bailout; \
+}
+#endif
 #define THROW(m) { \
-  snprintf(this->errStr, JMSG_LENGTH_MAX, "%s", m); \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s", m); \
   this->isInstanceError = TRUE;  THROWG(m) \
 }
 
@@ -215,7 +223,7 @@
   j_decompress_ptr dinfo = NULL; \
   \
   if (!this) { \
-    snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \
     return -1; \
   } \
   cinfo = &this->cinfo;  dinfo = &this->dinfo; \
@@ -227,7 +235,7 @@
   j_compress_ptr cinfo = NULL; \
   \
   if (!this) { \
-    snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \
     return -1; \
   } \
   cinfo = &this->cinfo; \
@@ -239,7 +247,7 @@
   j_decompress_ptr dinfo = NULL; \
   \
   if (!this) { \
-    snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \
     return -1; \
   } \
   dinfo = &this->dinfo; \
@@ -270,7 +278,7 @@
                             int flags)
 {
 #ifndef NO_GETENV
-  char *env = NULL;
+  char env[7] = { 0 };
 #endif
 
   cinfo->in_color_space = pf2cs[pixelFormat];
@@ -278,18 +286,21 @@
   jpeg_set_defaults(cinfo);
 
 #ifndef NO_GETENV
-  if ((env = getenv("TJ_OPTIMIZE")) != NULL && strlen(env) > 0 &&
-      !strcmp(env, "1"))
+  if (!GETENV_S(env, 7, "TJ_OPTIMIZE") && !strcmp(env, "1"))
     cinfo->optimize_coding = TRUE;
-  if ((env = getenv("TJ_ARITHMETIC")) != NULL && strlen(env) > 0 &&
-      !strcmp(env, "1"))
+  if (!GETENV_S(env, 7, "TJ_ARITHMETIC") && !strcmp(env, "1"))
     cinfo->arith_code = TRUE;
-  if ((env = getenv("TJ_RESTART")) != NULL && strlen(env) > 0) {
+  if (!GETENV_S(env, 7, "TJ_RESTART") && strlen(env) > 0) {
     int temp = -1;
     char tempc = 0;
 
+#ifdef _MSC_VER
+    if (sscanf_s(env, "%d%c", &temp, &tempc, 1) >= 1 && temp >= 0 &&
+        temp <= 65535) {
+#else
     if (sscanf(env, "%d%c", &temp, &tempc) >= 1 && temp >= 0 &&
         temp <= 65535) {
+#endif
       if (toupper(tempc) == 'B') {
         cinfo->restart_interval = temp;
         cinfo->restart_in_rows = 0;
@@ -313,13 +324,14 @@
   else
     jpeg_set_colorspace(cinfo, JCS_YCbCr);
 
+#ifdef C_PROGRESSIVE_SUPPORTED
   if (flags & TJFLAG_PROGRESSIVE)
     jpeg_simple_progression(cinfo);
 #ifndef NO_GETENV
-  else if ((env = getenv("TJ_PROGRESSIVE")) != NULL && strlen(env) > 0 &&
-           !strcmp(env, "1"))
+  else if (!GETENV_S(env, 7, "TJ_PROGRESSIVE") && !strcmp(env, "1"))
     jpeg_simple_progression(cinfo);
 #endif
+#endif
 
   cinfo->comp_info[0].h_samp_factor = tjMCUWidth[subsamp] / 8;
   cinfo->comp_info[1].h_samp_factor = 1;
@@ -345,7 +357,7 @@
   if (dinfo->num_components == 1 && dinfo->jpeg_color_space == JCS_GRAYSCALE)
     return TJSAMP_GRAY;
 
-  for (i = 0; i < NUMSUBOPT; i++) {
+  for (i = 0; i < TJ_NUMSAMP; i++) {
     if (dinfo->num_components == pixelsize[i] ||
         ((dinfo->jpeg_color_space == JCS_YCCK ||
           dinfo->jpeg_color_space == JCS_CMYK) &&
@@ -414,8 +426,9 @@
 }
 
 
-/* General API functions */
+/*************************** General API functions ***************************/
 
+/* TurboJPEG 2.0+ */
 DLLEXPORT char *tjGetErrorStr2(tjhandle handle)
 {
   tjinstance *this = (tjinstance *)handle;
@@ -428,12 +441,14 @@
 }
 
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT char *tjGetErrorStr(void)
 {
   return errStr;
 }
 
 
+/* TurboJPEG 2.0+ */
 DLLEXPORT int tjGetErrorCode(tjhandle handle)
 {
   tjinstance *this = (tjinstance *)handle;
@@ -443,6 +458,7 @@
 }
 
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT int tjDestroy(tjhandle handle)
 {
   GET_INSTANCE(handle);
@@ -460,19 +476,21 @@
    with turbojpeg.dll for compatibility reasons.  However, these functions
    can potentially be used for other purposes by different implementations. */
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT void tjFree(unsigned char *buf)
 {
   free(buf);
 }
 
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT unsigned char *tjAlloc(int bytes)
 {
   return (unsigned char *)malloc(bytes);
 }
 
 
-/* Compressor  */
+/******************************** Compressor *********************************/
 
 static tjhandle _tjInitCompress(tjinstance *this)
 {
@@ -504,27 +522,29 @@
   return (tjhandle)this;
 }
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT tjhandle tjInitCompress(void)
 {
   tjinstance *this = NULL;
 
   if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL) {
-    snprintf(errStr, JMSG_LENGTH_MAX,
+    SNPRINTF(errStr, JMSG_LENGTH_MAX,
              "tjInitCompress(): Memory allocation failure");
     return NULL;
   }
-  MEMZERO(this, sizeof(tjinstance));
-  snprintf(this->errStr, JMSG_LENGTH_MAX, "No error");
+  memset(this, 0, sizeof(tjinstance));
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "No error");
   return _tjInitCompress(this);
 }
 
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp)
 {
   unsigned long long retval = 0;
   int mcuw, mcuh, chromasf;
 
-  if (width < 1 || height < 1 || jpegSubsamp < 0 || jpegSubsamp >= NUMSUBOPT)
+  if (width < 1 || height < 1 || jpegSubsamp < 0 || jpegSubsamp >= TJ_NUMSAMP)
     THROWG("tjBufSize(): Invalid argument");
 
   /* This allows for rare corner cases in which a JPEG image can actually be
@@ -541,6 +561,7 @@
   return (unsigned long)retval;
 }
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT unsigned long TJBUFSIZE(int width, int height)
 {
   unsigned long long retval = 0;
@@ -560,19 +581,20 @@
 }
 
 
-DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height,
+/* TurboJPEG 1.4+ */
+DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height,
                                       int subsamp)
 {
   unsigned long long retval = 0;
   int nc, i;
 
-  if (subsamp < 0 || subsamp >= NUMSUBOPT)
+  if (align < 1 || !IS_POW2(align) || subsamp < 0 || subsamp >= TJ_NUMSAMP)
     THROWG("tjBufSizeYUV2(): Invalid argument");
 
   nc = (subsamp == TJSAMP_GRAY ? 1 : 3);
   for (i = 0; i < nc; i++) {
     int pw = tjPlaneWidth(i, width, subsamp);
-    int stride = PAD(pw, pad);
+    int stride = PAD(pw, align);
     int ph = tjPlaneHeight(i, height, subsamp);
 
     if (pw < 0 || ph < 0) return -1;
@@ -585,20 +607,24 @@
   return (unsigned long)retval;
 }
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp)
 {
   return tjBufSizeYUV2(width, 4, height, subsamp);
 }
 
+/* TurboJPEG 1.1+ */
 DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int subsamp)
 {
   return tjBufSizeYUV(width, height, subsamp);
 }
 
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp)
 {
-  int pw, nc, retval = 0;
+  unsigned long long pw, retval = 0;
+  int nc;
 
   if (width < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
     THROWG("tjPlaneWidth(): Invalid argument");
@@ -606,20 +632,25 @@
   if (componentID < 0 || componentID >= nc)
     THROWG("tjPlaneWidth(): Invalid argument");
 
-  pw = PAD(width, tjMCUWidth[subsamp] / 8);
+  pw = PAD((unsigned long long)width, tjMCUWidth[subsamp] / 8);
   if (componentID == 0)
     retval = pw;
   else
     retval = pw * 8 / tjMCUWidth[subsamp];
 
+  if (retval > (unsigned long long)INT_MAX)
+    THROWG("tjPlaneWidth(): Width is too large");
+
 bailout:
-  return retval;
+  return (int)retval;
 }
 
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp)
 {
-  int ph, nc, retval = 0;
+  unsigned long long ph, retval = 0;
+  int nc;
 
   if (height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
     THROWG("tjPlaneHeight(): Invalid argument");
@@ -627,24 +658,28 @@
   if (componentID < 0 || componentID >= nc)
     THROWG("tjPlaneHeight(): Invalid argument");
 
-  ph = PAD(height, tjMCUHeight[subsamp] / 8);
+  ph = PAD((unsigned long long)height, tjMCUHeight[subsamp] / 8);
   if (componentID == 0)
     retval = ph;
   else
     retval = ph * 8 / tjMCUHeight[subsamp];
 
+  if (retval > (unsigned long long)INT_MAX)
+    THROWG("tjPlaneHeight(): Height is too large");
+
 bailout:
-  return retval;
+  return (int)retval;
 }
 
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride,
                                        int height, int subsamp)
 {
   unsigned long long retval = 0;
   int pw, ph;
 
-  if (width < 1 || height < 1 || subsamp < 0 || subsamp >= NUMSUBOPT)
+  if (width < 1 || height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
     THROWG("tjPlaneSizeYUV(): Invalid argument");
 
   pw = tjPlaneWidth(componentID, width, subsamp);
@@ -663,12 +698,14 @@
 }
 
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
                           int width, int pitch, int height, int pixelFormat,
                           unsigned char **jpegBuf, unsigned long *jpegSize,
                           int jpegSubsamp, int jpegQual, int flags)
 {
-  int i, retval = 0, alloc = 1;
+  int i, retval = 0;
+  boolean alloc = TRUE;
   JSAMPROW *row_pointer = NULL;
 
   GET_CINSTANCE(handle)
@@ -678,7 +715,7 @@
 
   if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
       pixelFormat < 0 || pixelFormat >= TJ_NUMPF || jpegBuf == NULL ||
-      jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= NUMSUBOPT ||
+      jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= TJ_NUMSAMP ||
       jpegQual < 0 || jpegQual > 100)
     THROW("tjCompress2(): Invalid argument");
 
@@ -696,13 +733,13 @@
   cinfo->image_height = height;
 
 #ifndef NO_PUTENV
-  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
-  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
-  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
 #endif
 
   if (flags & TJFLAG_NOREALLOC) {
-    alloc = 0;  *jpegSize = tjBufSize(width, height, jpegSubsamp);
+    alloc = FALSE;  *jpegSize = tjBufSize(width, height, jpegSubsamp);
   }
   jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
   setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags);
@@ -730,6 +767,7 @@
   return retval;
 }
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width,
                          int pitch, int height, int pixelSize,
                          unsigned char *jpegBuf, unsigned long *jpegSize,
@@ -753,6 +791,7 @@
 }
 
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf,
                                 int width, int pitch, int height,
                                 int pixelFormat, unsigned char **dstPlanes,
@@ -779,13 +818,13 @@
 
   if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
       pixelFormat < 0 || pixelFormat >= TJ_NUMPF || !dstPlanes ||
-      !dstPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT)
+      !dstPlanes[0] || subsamp < 0 || subsamp >= TJ_NUMSAMP)
     THROW("tjEncodeYUVPlanes(): Invalid argument");
   if (subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
     THROW("tjEncodeYUVPlanes(): Invalid argument");
 
   if (pixelFormat == TJPF_CMYK)
-    THROW("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels");
+    THROW("tjEncodeYUVPlanes(): Cannot generate YUV images from packed-pixel CMYK images");
 
   if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
 
@@ -798,9 +837,9 @@
   cinfo->image_height = height;
 
 #ifndef NO_PUTENV
-  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
-  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
-  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
 #endif
 
   setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags);
@@ -845,7 +884,7 @@
       THROW("tjEncodeYUVPlanes(): Memory allocation failure");
     for (row = 0; row < cinfo->max_v_samp_factor; row++) {
       unsigned char *_tmpbuf_aligned =
-        (unsigned char *)PAD((size_t)_tmpbuf[i], 32);
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf[i], 32);
 
       tmpbuf[i][row] = &_tmpbuf_aligned[
         PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) /
@@ -861,7 +900,7 @@
       THROW("tjEncodeYUVPlanes(): Memory allocation failure");
     for (row = 0; row < compptr->v_samp_factor; row++) {
       unsigned char *_tmpbuf2_aligned =
-        (unsigned char *)PAD((size_t)_tmpbuf2[i], 32);
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf2[i], 32);
 
       tmpbuf2[i][row] =
         &_tmpbuf2_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
@@ -911,9 +950,10 @@
   return retval;
 }
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf,
                            int width, int pitch, int height, int pixelFormat,
-                           unsigned char *dstBuf, int pad, int subsamp,
+                           unsigned char *dstBuf, int align, int subsamp,
                            int flags)
 {
   unsigned char *dstPlanes[3];
@@ -923,14 +963,14 @@
   if (!this) THROWG("tjEncodeYUV3(): Invalid handle");
   this->isInstanceError = FALSE;
 
-  if (width <= 0 || height <= 0 || dstBuf == NULL || pad < 0 ||
-      !IS_POW2(pad) || subsamp < 0 || subsamp >= NUMSUBOPT)
+  if (width <= 0 || height <= 0 || dstBuf == NULL || align < 1 ||
+      !IS_POW2(align) || subsamp < 0 || subsamp >= TJ_NUMSAMP)
     THROW("tjEncodeYUV3(): Invalid argument");
 
   pw0 = tjPlaneWidth(0, width, subsamp);
   ph0 = tjPlaneHeight(0, height, subsamp);
   dstPlanes[0] = dstBuf;
-  strides[0] = PAD(pw0, pad);
+  strides[0] = PAD(pw0, align);
   if (subsamp == TJSAMP_GRAY) {
     strides[1] = strides[2] = 0;
     dstPlanes[1] = dstPlanes[2] = NULL;
@@ -938,7 +978,7 @@
     int pw1 = tjPlaneWidth(1, width, subsamp);
     int ph1 = tjPlaneHeight(1, height, subsamp);
 
-    strides[1] = strides[2] = PAD(pw1, pad);
+    strides[1] = strides[2] = PAD(pw1, align);
     dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
     dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
   }
@@ -950,6 +990,7 @@
   return retval;
 }
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
                            int pitch, int height, int pixelFormat,
                            unsigned char *dstBuf, int subsamp, int flags)
@@ -958,6 +999,7 @@
                       dstBuf, 4, subsamp, flags);
 }
 
+/* TurboJPEG 1.1+ */
 DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
                           int pitch, int height, int pixelSize,
                           unsigned char *dstBuf, int subsamp, int flags)
@@ -968,6 +1010,7 @@
 }
 
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle,
                                       const unsigned char **srcPlanes,
                                       int width, const int *strides,
@@ -976,7 +1019,8 @@
                                       unsigned long *jpegSize, int jpegQual,
                                       int flags)
 {
-  int i, row, retval = 0, alloc = 1;
+  int i, row, retval = 0;
+  boolean alloc = TRUE;
   int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
     tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS];
   JSAMPLE *_tmpbuf = NULL, *ptr;
@@ -993,7 +1037,7 @@
     THROW("tjCompressFromYUVPlanes(): Instance has not been initialized for compression");
 
   if (!srcPlanes || !srcPlanes[0] || width <= 0 || height <= 0 ||
-      subsamp < 0 || subsamp >= NUMSUBOPT || jpegBuf == NULL ||
+      subsamp < 0 || subsamp >= TJ_NUMSAMP || jpegBuf == NULL ||
       jpegSize == NULL || jpegQual < 0 || jpegQual > 100)
     THROW("tjCompressFromYUVPlanes(): Invalid argument");
   if (subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
@@ -1008,13 +1052,13 @@
   cinfo->image_height = height;
 
 #ifndef NO_PUTENV
-  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
-  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
-  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
 #endif
 
   if (flags & TJFLAG_NOREALLOC) {
-    alloc = 0;  *jpegSize = tjBufSize(width, height, subsamp);
+    alloc = FALSE;  *jpegSize = tjBufSize(width, height, subsamp);
   }
   jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
   setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags);
@@ -1105,8 +1149,9 @@
   return retval;
 }
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf,
-                                int width, int pad, int height, int subsamp,
+                                int width, int align, int height, int subsamp,
                                 unsigned char **jpegBuf,
                                 unsigned long *jpegSize, int jpegQual,
                                 int flags)
@@ -1118,14 +1163,14 @@
   if (!this) THROWG("tjCompressFromYUV(): Invalid handle");
   this->isInstanceError = FALSE;
 
-  if (srcBuf == NULL || width <= 0 || pad < 1 || height <= 0 || subsamp < 0 ||
-      subsamp >= NUMSUBOPT)
+  if (srcBuf == NULL || width <= 0 || align < 1 || !IS_POW2(align) ||
+      height <= 0 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
     THROW("tjCompressFromYUV(): Invalid argument");
 
   pw0 = tjPlaneWidth(0, width, subsamp);
   ph0 = tjPlaneHeight(0, height, subsamp);
   srcPlanes[0] = srcBuf;
-  strides[0] = PAD(pw0, pad);
+  strides[0] = PAD(pw0, align);
   if (subsamp == TJSAMP_GRAY) {
     strides[1] = strides[2] = 0;
     srcPlanes[1] = srcPlanes[2] = NULL;
@@ -1133,7 +1178,7 @@
     int pw1 = tjPlaneWidth(1, width, subsamp);
     int ph1 = tjPlaneHeight(1, height, subsamp);
 
-    strides[1] = strides[2] = PAD(pw1, pad);
+    strides[1] = strides[2] = PAD(pw1, align);
     srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
     srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
   }
@@ -1146,7 +1191,7 @@
 }
 
 
-/* Decompressor */
+/******************************* Decompressor ********************************/
 
 static tjhandle _tjInitDecompress(tjinstance *this)
 {
@@ -1176,21 +1221,23 @@
   return (tjhandle)this;
 }
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT tjhandle tjInitDecompress(void)
 {
   tjinstance *this;
 
   if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL) {
-    snprintf(errStr, JMSG_LENGTH_MAX,
+    SNPRINTF(errStr, JMSG_LENGTH_MAX,
              "tjInitDecompress(): Memory allocation failure");
     return NULL;
   }
-  MEMZERO(this, sizeof(tjinstance));
-  snprintf(this->errStr, JMSG_LENGTH_MAX, "No error");
+  memset(this, 0, sizeof(tjinstance));
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "No error");
   return _tjInitDecompress(this);
 }
 
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjDecompressHeader3(tjhandle handle,
                                   const unsigned char *jpegBuf,
                                   unsigned long jpegSize, int *width,
@@ -1213,7 +1260,13 @@
   }
 
   jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
-  jpeg_read_header(dinfo, TRUE);
+
+  /* jpeg_read_header() calls jpeg_abort() and returns JPEG_HEADER_TABLES_ONLY
+     if the datastream is a tables-only datastream.  Since we aren't using a
+     suspending data source, the only other value it can return is
+     JPEG_HEADER_OK. */
+  if (jpeg_read_header(dinfo, FALSE) == JPEG_HEADER_TABLES_ONLY)
+    return 0;
 
   *width = dinfo->image_width;
   *height = dinfo->image_height;
@@ -1241,6 +1294,7 @@
   return retval;
 }
 
+/* TurboJPEG 1.1+ */
 DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf,
                                   unsigned long jpegSize, int *width,
                                   int *height, int *jpegSubsamp)
@@ -1251,6 +1305,7 @@
                              jpegSubsamp, &jpegColorspace);
 }
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf,
                                  unsigned long jpegSize, int *width,
                                  int *height)
@@ -1262,19 +1317,21 @@
 }
 
 
-DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors)
+/* TurboJPEG 1.2+ */
+DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numScalingFactors)
 {
-  if (numscalingfactors == NULL) {
-    snprintf(errStr, JMSG_LENGTH_MAX,
+  if (numScalingFactors == NULL) {
+    SNPRINTF(errStr, JMSG_LENGTH_MAX,
              "tjGetScalingFactors(): Invalid argument");
     return NULL;
   }
 
-  *numscalingfactors = NUMSF;
+  *numScalingFactors = NUMSF;
   return (tjscalingfactor *)sf;
 }
 
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf,
                             unsigned long jpegSize, unsigned char *dstBuf,
                             int width, int pitch, int height, int pixelFormat,
@@ -1294,13 +1351,13 @@
     THROW("tjDecompress2(): Invalid argument");
 
 #ifndef NO_PUTENV
-  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
-  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
-  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
 #endif
 
   if (flags & TJFLAG_LIMITSCANS) {
-    MEMZERO(&progress, sizeof(struct my_progress_mgr));
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
     progress.pub.progress_monitor = my_progress_monitor;
     progress.this = this;
     dinfo->progress = &progress.pub;
@@ -1362,6 +1419,7 @@
   return retval;
 }
 
+/* TurboJPEG 1.0+ */
 DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
                            unsigned long jpegSize, unsigned char *dstBuf,
                            int width, int pitch, int height, int pixelSize,
@@ -1375,8 +1433,8 @@
 }
 
 
-static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo,
-                             int pixelFormat, int subsamp, int flags)
+static void setDecodeDefaults(struct jpeg_decompress_struct *dinfo,
+                              int pixelFormat, int subsamp, int flags)
 {
   int i;
 
@@ -1411,8 +1469,6 @@
     if (dinfo->quant_tbl_ptrs[i] == NULL)
       dinfo->quant_tbl_ptrs[i] = jpeg_alloc_quant_table((j_common_ptr)dinfo);
   }
-
-  return 0;
 }
 
 
@@ -1425,6 +1481,7 @@
 {
 }
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle,
                                 const unsigned char **srcPlanes,
                                 const int *strides, int subsamp,
@@ -1450,7 +1507,7 @@
   if ((this->init & DECOMPRESS) == 0)
     THROW("tjDecodeYUVPlanes(): Instance has not been initialized for decompression");
 
-  if (!srcPlanes || !srcPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT ||
+  if (!srcPlanes || !srcPlanes[0] || subsamp < 0 || subsamp >= TJ_NUMSAMP ||
       dstBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
       pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
     THROW("tjDecodeYUVPlanes(): Invalid argument");
@@ -1463,24 +1520,22 @@
   }
 
   if (pixelFormat == TJPF_CMYK)
-    THROW("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels.");
+    THROW("tjDecodeYUVPlanes(): Cannot decode YUV images into packed-pixel CMYK images.");
 
   if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
   dinfo->image_width = width;
   dinfo->image_height = height;
 
 #ifndef NO_PUTENV
-  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
-  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
-  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
 #endif
 
   dinfo->progressive_mode = dinfo->inputctl->has_multiple_scans = FALSE;
   dinfo->Ss = dinfo->Ah = dinfo->Al = 0;
   dinfo->Se = DCTSIZE2 - 1;
-  if (setDecodeDefaults(dinfo, pixelFormat, subsamp, flags) == -1) {
-    retval = -1;  goto bailout;
-  }
+  setDecodeDefaults(dinfo, pixelFormat, subsamp, flags);
   old_read_markers = dinfo->marker->read_markers;
   dinfo->marker->read_markers = my_read_markers;
   old_reset_marker_reader = dinfo->marker->reset_marker_reader;
@@ -1524,7 +1579,7 @@
       THROW("tjDecodeYUVPlanes(): Memory allocation failure");
     for (row = 0; row < compptr->v_samp_factor; row++) {
       unsigned char *_tmpbuf_aligned =
-        (unsigned char *)PAD((size_t)_tmpbuf[i], 32);
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf[i], 32);
 
       tmpbuf[i][row] =
         &_tmpbuf_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
@@ -1573,8 +1628,9 @@
   return retval;
 }
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
-                          int pad, int subsamp, unsigned char *dstBuf,
+                          int align, int subsamp, unsigned char *dstBuf,
                           int width, int pitch, int height, int pixelFormat,
                           int flags)
 {
@@ -1585,14 +1641,14 @@
   if (!this) THROWG("tjDecodeYUV(): Invalid handle");
   this->isInstanceError = FALSE;
 
-  if (srcBuf == NULL || pad < 0 || !IS_POW2(pad) || subsamp < 0 ||
-      subsamp >= NUMSUBOPT || width <= 0 || height <= 0)
+  if (srcBuf == NULL || align < 1 || !IS_POW2(align) || subsamp < 0 ||
+      subsamp >= TJ_NUMSAMP || width <= 0 || height <= 0)
     THROW("tjDecodeYUV(): Invalid argument");
 
   pw0 = tjPlaneWidth(0, width, subsamp);
   ph0 = tjPlaneHeight(0, height, subsamp);
   srcPlanes[0] = srcBuf;
-  strides[0] = PAD(pw0, pad);
+  strides[0] = PAD(pw0, align);
   if (subsamp == TJSAMP_GRAY) {
     strides[1] = strides[2] = 0;
     srcPlanes[1] = srcPlanes[2] = NULL;
@@ -1600,7 +1656,7 @@
     int pw1 = tjPlaneWidth(1, width, subsamp);
     int ph1 = tjPlaneHeight(1, height, subsamp);
 
-    strides[1] = strides[2] = PAD(pw1, pad);
+    strides[1] = strides[2] = PAD(pw1, align);
     srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
     srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
   }
@@ -1612,6 +1668,7 @@
   return retval;
 }
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle,
                                       const unsigned char *jpegBuf,
                                       unsigned long jpegSize,
@@ -1642,13 +1699,13 @@
     THROW("tjDecompressToYUVPlanes(): Invalid argument");
 
 #ifndef NO_PUTENV
-  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
-  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
-  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
 #endif
 
   if (flags & TJFLAG_LIMITSCANS) {
-    MEMZERO(&progress, sizeof(struct my_progress_mgr));
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
     progress.pub.progress_monitor = my_progress_monitor;
     progress.this = this;
     dinfo->progress = &progress.pub;
@@ -1745,7 +1802,7 @@
     for (i = 0; i < dinfo->num_components; i++) {
       jpeg_component_info *compptr = &dinfo->comp_info[i];
 
-      if (jpegSubsamp == TJ_420) {
+      if (jpegSubsamp == TJSAMP_420) {
         /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try
            to be clever and use the IDCT to perform upsampling on the U and V
            planes.  For instance, if the output image is to be scaled by 1/2
@@ -1792,9 +1849,10 @@
   return retval;
 }
 
+/* TurboJPEG 1.4+ */
 DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf,
                                  unsigned long jpegSize, unsigned char *dstBuf,
-                                 int width, int pad, int height, int flags)
+                                 int width, int align, int height, int flags)
 {
   unsigned char *dstPlanes[3];
   int pw0, ph0, strides[3], retval = -1, jpegSubsamp = -1;
@@ -1804,7 +1862,7 @@
   this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE;
 
   if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 ||
-      pad < 1 || !IS_POW2(pad) || height < 0)
+      align < 1 || !IS_POW2(align) || height < 0)
     THROW("tjDecompressToYUV2(): Invalid argument");
 
   if (setjmp(this->jerr.setjmp_buffer)) {
@@ -1821,7 +1879,6 @@
   jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
   if (width == 0) width = jpegwidth;
   if (height == 0) height = jpegheight;
-
   for (i = 0; i < NUMSF; i++) {
     scaledw = TJSCALED(jpegwidth, sf[i]);
     scaledh = TJSCALED(jpegheight, sf[i]);
@@ -1831,10 +1888,12 @@
   if (i >= NUMSF)
     THROW("tjDecompressToYUV2(): Could not scale down to desired image dimensions");
 
+  width = scaledw;  height = scaledh;
+
   pw0 = tjPlaneWidth(0, width, jpegSubsamp);
   ph0 = tjPlaneHeight(0, height, jpegSubsamp);
   dstPlanes[0] = dstBuf;
-  strides[0] = PAD(pw0, pad);
+  strides[0] = PAD(pw0, align);
   if (jpegSubsamp == TJSAMP_GRAY) {
     strides[1] = strides[2] = 0;
     dstPlanes[1] = dstPlanes[2] = NULL;
@@ -1842,7 +1901,7 @@
     int pw1 = tjPlaneWidth(1, width, jpegSubsamp);
     int ph1 = tjPlaneHeight(1, height, jpegSubsamp);
 
-    strides[1] = strides[2] = PAD(pw1, pad);
+    strides[1] = strides[2] = PAD(pw1, align);
     dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
     dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
   }
@@ -1856,6 +1915,7 @@
   return retval;
 }
 
+/* TurboJPEG 1.1+ */
 DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf,
                                 unsigned long jpegSize, unsigned char *dstBuf,
                                 int flags)
@@ -1864,20 +1924,21 @@
 }
 
 
-/* Transformer */
+/******************************** Transformer ********************************/
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT tjhandle tjInitTransform(void)
 {
   tjinstance *this = NULL;
   tjhandle handle = NULL;
 
   if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL) {
-    snprintf(errStr, JMSG_LENGTH_MAX,
+    SNPRINTF(errStr, JMSG_LENGTH_MAX,
              "tjInitTransform(): Memory allocation failure");
     return NULL;
   }
-  MEMZERO(this, sizeof(tjinstance));
-  snprintf(this->errStr, JMSG_LENGTH_MAX, "No error");
+  memset(this, 0, sizeof(tjinstance));
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "No error");
   handle = _tjInitCompress(this);
   if (!handle) return NULL;
   handle = _tjInitDecompress(this);
@@ -1885,6 +1946,7 @@
 }
 
 
+/* TurboJPEG 1.2+ */
 DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf,
                           unsigned long jpegSize, int n,
                           unsigned char **dstBufs, unsigned long *dstSizes,
@@ -1892,7 +1954,8 @@
 {
   jpeg_transform_info *xinfo = NULL;
   jvirt_barray_ptr *srccoefs, *dstcoefs;
-  int retval = 0, alloc = 1, i, jpegSubsamp, saveMarkers = 0;
+  int retval = 0, i, jpegSubsamp, saveMarkers = 0;
+  boolean alloc = TRUE;
   struct my_progress_mgr progress;
 
   GET_INSTANCE(handle);
@@ -1905,13 +1968,13 @@
     THROW("tjTransform(): Invalid argument");
 
 #ifndef NO_PUTENV
-  if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
-  else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
-  else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
 #endif
 
   if (flags & TJFLAG_LIMITSCANS) {
-    MEMZERO(&progress, sizeof(struct my_progress_mgr));
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
     progress.pub.progress_monitor = my_progress_monitor;
     progress.this = this;
     dinfo->progress = &progress.pub;
@@ -1921,7 +1984,7 @@
   if ((xinfo =
        (jpeg_transform_info *)malloc(sizeof(jpeg_transform_info) * n)) == NULL)
     THROW("tjTransform(): Memory allocation failure");
-  MEMZERO(xinfo, sizeof(jpeg_transform_info) * n);
+  memset(xinfo, 0, sizeof(jpeg_transform_info) * n);
 
   if (setjmp(this->jerr.setjmp_buffer)) {
     /* If we get here, the JPEG code has signaled an error. */
@@ -1965,12 +2028,12 @@
       THROW("tjTransform(): Transform is not perfect");
 
     if (xinfo[i].crop) {
-      if ((t[i].r.x % xinfo[i].iMCU_sample_width) != 0 ||
-          (t[i].r.y % xinfo[i].iMCU_sample_height) != 0) {
-        snprintf(this->errStr, JMSG_LENGTH_MAX,
+      if ((t[i].r.x % tjMCUWidth[jpegSubsamp]) != 0 ||
+          (t[i].r.y % tjMCUHeight[jpegSubsamp]) != 0) {
+        SNPRINTF(this->errStr, JMSG_LENGTH_MAX,
                  "To crop this JPEG image, x must be a multiple of %d\n"
                  "and y must be a multiple of %d.\n",
-                 xinfo[i].iMCU_sample_width, xinfo[i].iMCU_sample_height);
+                 tjMCUWidth[jpegSubsamp], tjMCUHeight[jpegSubsamp]);
         this->isInstanceError = TRUE;
         retval = -1;  goto bailout;
       }
@@ -1988,14 +2051,16 @@
       w = xinfo[i].crop_width;  h = xinfo[i].crop_height;
     }
     if (flags & TJFLAG_NOREALLOC) {
-      alloc = 0;  dstSizes[i] = tjBufSize(w, h, jpegSubsamp);
+      alloc = FALSE;  dstSizes[i] = tjBufSize(w, h, jpegSubsamp);
     }
     if (!(t[i].options & TJXOPT_NOOUTPUT))
       jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc);
     jpeg_copy_critical_parameters(dinfo, cinfo);
     dstcoefs = jtransform_adjust_parameters(dinfo, cinfo, srccoefs, &xinfo[i]);
+#ifdef C_PROGRESSIVE_SUPPORTED
     if (flags & TJFLAG_PROGRESSIVE || t[i].options & TJXOPT_PROGRESSIVE)
       jpeg_simple_progression(cinfo);
+#endif
     if (!(t[i].options & TJXOPT_NOOUTPUT)) {
       jpeg_write_coefficients(cinfo, dstcoefs);
       jcopy_markers_execute(dinfo, cinfo, t[i].options & TJXOPT_COPYNONE ?
@@ -2009,13 +2074,13 @@
 
       for (ci = 0; ci < cinfo->num_components; ci++) {
         jpeg_component_info *compptr = &cinfo->comp_info[ci];
-        tjregion arrayRegion = {
-          0, 0, compptr->width_in_blocks * DCTSIZE, DCTSIZE
-        };
-        tjregion planeRegion = {
-          0, 0, compptr->width_in_blocks * DCTSIZE,
-          compptr->height_in_blocks * DCTSIZE
-        };
+        tjregion arrayRegion = { 0, 0, 0, 0 };
+        tjregion planeRegion = { 0, 0, 0, 0 };
+
+        arrayRegion.w = compptr->width_in_blocks * DCTSIZE;
+        arrayRegion.h = DCTSIZE;
+        planeRegion.w = compptr->width_in_blocks * DCTSIZE;
+        planeRegion.h = compptr->height_in_blocks * DCTSIZE;
 
         for (by = 0; by < compptr->height_in_blocks;
              by += compptr->v_samp_factor) {
@@ -2050,6 +2115,9 @@
 }
 
 
+/*************************** Packed-Pixel Image I/O **************************/
+
+/* TurboJPEG 2.0+ */
 DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width,
                                      int align, int *height, int *pixelFormat,
                                      int flags)
@@ -2074,7 +2142,11 @@
   this = (tjinstance *)handle;
   cinfo = &this->cinfo;
 
+#ifdef _MSC_VER
+  if (fopen_s(&file, filename, "rb") || file == NULL)
+#else
   if ((file = fopen(filename, "rb")) == NULL)
+#endif
     THROW_UNIX("tjLoadImage(): Cannot open input file");
 
   if ((tempc = getc(file)) < 0 || ungetc(tempc, file) == EOF)
@@ -2095,7 +2167,7 @@
     invert = (flags & TJFLAG_BOTTOMUP) == 0;
   } else if (tempc == 'P') {
     if ((src = jinit_read_ppm(cinfo)) == NULL)
-      THROWG("tjLoadImage(): Could not initialize bitmap loader");
+      THROWG("tjLoadImage(): Could not initialize PPM loader");
     invert = (flags & TJFLAG_BOTTOMUP) != 0;
   } else
     THROWG("tjLoadImage(): Unsupported file type");
@@ -2148,6 +2220,7 @@
 }
 
 
+/* TurboJPEG 2.0+ */
 DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer,
                           int width, int pitch, int height, int pixelFormat,
                           int flags)
@@ -2170,7 +2243,11 @@
   this = (tjinstance *)handle;
   dinfo = &this->dinfo;
 
+#ifdef _MSC_VER
+  if (fopen_s(&file, filename, "wb") || file == NULL)
+#else
   if ((file = fopen(filename, "wb")) == NULL)
+#endif
     THROW_UNIX("tjSaveImage(): Cannot open output file");
 
   if (setjmp(this->jerr.setjmp_buffer)) {
diff --git a/turbojpeg.h b/turbojpeg.h
index c2f6b51..1f8756a 100644
--- a/turbojpeg.h
+++ b/turbojpeg.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C)2009-2015, 2017, 2020-2021 D. R. Commander.
- *                                         All Rights Reserved.
+ * Copyright (C)2009-2015, 2017, 2020-2021, 2023 D. R. Commander.
+ *                                               All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -54,23 +54,24 @@
  * Each plane is simply a 2D array of bytes, each byte representing the value
  * of one of the components (Y, Cb, or Cr) at a particular location in the
  * image.  The width and height of each plane are determined by the image
- * width, height, and level of chrominance subsampling.   The luminance plane
+ * width, height, and level of chrominance subsampling.  The luminance plane
  * width is the image width padded to the nearest multiple of the horizontal
- * subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of
- * 4:1:1, 1 in the case of 4:4:4 or grayscale.)  Similarly, the luminance plane
- * height is the image height padded to the nearest multiple of the vertical
- * subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4
- * or grayscale.)  This is irrespective of any additional padding that may be
- * specified as an argument to the various YUV functions.  The chrominance
- * plane width is equal to the luminance plane width divided by the horizontal
- * subsampling factor, and the chrominance plane height is equal to the
- * luminance plane height divided by the vertical subsampling factor.
+ * subsampling factor (1 in the case of 4:4:4, grayscale, or 4:4:0; 2 in the
+ * case of 4:2:2 or 4:2:0; 4 in the case of 4:1:1.)  Similarly, the luminance
+ * plane height is the image height padded to the nearest multiple of the
+ * vertical subsampling factor (1 in the case of 4:4:4, 4:2:2, grayscale, or
+ * 4:1:1; 2 in the case of 4:2:0 or 4:4:0.)  This is irrespective of any
+ * additional padding that may be specified as an argument to the various YUV
+ * functions.  The chrominance plane width is equal to the luminance plane
+ * width divided by the horizontal subsampling factor, and the chrominance
+ * plane height is equal to the luminance plane height divided by the vertical
+ * subsampling factor.
  *
  * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is
  * used, then the luminance plane would be 36 x 35 bytes, and each of the
- * chrominance planes would be 18 x 35 bytes.  If you specify a line padding of
- * 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and
- * each of the chrominance planes would be 20 x 35 bytes.
+ * chrominance planes would be 18 x 35 bytes.  If you specify a row alignment
+ * of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes,
+ * and each of the chrominance planes would be 20 x 35 bytes.
  *
  * @{
  */
@@ -86,8 +87,8 @@
  * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK
  * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of
  * the Cb and Cr (chrominance) components can be discarded or averaged together
- * to produce a smaller image with little perceptible loss of image clarity
- * (the human eye is more sensitive to small changes in brightness than to
+ * to produce a smaller image with little perceptible loss of image clarity.
+ * (The human eye is more sensitive to small changes in brightness than to
  * small changes in color.)  This is called "chrominance subsampling".
  */
 enum TJSAMP {
@@ -245,8 +246,8 @@
    * vice versa, but the mapping is typically not 1:1 or reversible, nor can it
    * be defined with a simple formula.  Thus, such a conversion is out of scope
    * for a codec library.  However, the TurboJPEG API allows for compressing
-   * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK
-   * JPEG images into CMYK pixels.
+   * packed-pixel CMYK images into YCCK JPEG images (see #TJCS_YCCK) and
+   * decompressing YCCK JPEG images into packed-pixel CMYK images.
    */
   TJPF_CMYK,
   /**
@@ -258,9 +259,10 @@
 /**
  * Red offset (in bytes) for a given pixel format.  This specifies the number
  * of bytes that the red component is offset from the start of the pixel.  For
- * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
- * then the red component will be <tt>pixel[tjRedOffset[TJ_BGRX]]</tt>.  This
- * will be -1 if the pixel format does not have a red component.
+ * instance, if a pixel of format TJPF_BGRX is stored in
+ * `unsigned char pixel[]`, then the red component will be
+ *`pixel[tjRedOffset[TJPF_BGRX]]`.  This will be -1 if the pixel format does
+ * not have a red component.
  */
 static const int tjRedOffset[TJ_NUMPF] = {
   0, 2, 0, 2, 3, 1, -1, 0, 2, 3, 1, -1
@@ -268,31 +270,32 @@
 /**
  * Green offset (in bytes) for a given pixel format.  This specifies the number
  * of bytes that the green component is offset from the start of the pixel.
- * For instance, if a pixel of format TJ_BGRX is stored in
- * <tt>char pixel[]</tt>, then the green component will be
- * <tt>pixel[tjGreenOffset[TJ_BGRX]]</tt>.  This will be -1 if the pixel format
- * does not have a green component.
+ * For instance, if a pixel of format TJPF_BGRX is stored in
+ * `unsigned char pixel[]`, then the green component will be
+ * `pixel[tjGreenOffset[TJPF_BGRX]]`.  This will be -1 if the pixel format does
+ * not have a green component.
  */
 static const int tjGreenOffset[TJ_NUMPF] = {
   1, 1, 1, 1, 2, 2, -1, 1, 1, 2, 2, -1
 };
 /**
  * Blue offset (in bytes) for a given pixel format.  This specifies the number
- * of bytes that the Blue component is offset from the start of the pixel.  For
- * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
- * then the blue component will be <tt>pixel[tjBlueOffset[TJ_BGRX]]</tt>.  This
- * will be -1 if the pixel format does not have a blue component.
+ * of bytes that the blue component is offset from the start of the pixel.  For
+ * instance, if a pixel of format TJPF_BGRX is stored in
+ * `unsigned char pixel[]`, then the blue component will be
+ * `pixel[tjBlueOffset[TJPF_BGRX]]`.  This will be -1 if the pixel format does
+ * not have a blue component.
  */
 static const int tjBlueOffset[TJ_NUMPF] = {
   2, 0, 2, 0, 1, 3, -1, 2, 0, 1, 3, -1
 };
 /**
  * Alpha offset (in bytes) for a given pixel format.  This specifies the number
- * of bytes that the Alpha component is offset from the start of the pixel.
- * For instance, if a pixel of format TJ_BGRA is stored in
- * <tt>char pixel[]</tt>, then the alpha component will be
- * <tt>pixel[tjAlphaOffset[TJ_BGRA]]</tt>.  This will be -1 if the pixel format
- * does not have an alpha component.
+ * of bytes that the alpha component is offset from the start of the pixel.
+ * For instance, if a pixel of format TJPF_BGRA is stored in
+ * `unsigned char pixel[]`, then the alpha component will be
+ * `pixel[tjAlphaOffset[TJPF_BGRA]]`.  This will be -1 if the pixel format does
+ * not have an alpha component.
  */
 static const int tjAlphaOffset[TJ_NUMPF] = {
   -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
@@ -318,8 +321,9 @@
    * RGB colorspace.  When compressing the JPEG image, the R, G, and B
    * components in the source image are reordered into image planes, but no
    * colorspace conversion or subsampling is performed.  RGB JPEG images can be
-   * decompressed to any of the extended RGB pixel formats or grayscale, but
-   * they cannot be decompressed to YUV images.
+   * decompressed to packed-pixel images with any of the extended RGB or
+   * grayscale pixel formats, but they cannot be decompressed to planar YUV
+   * images.
    */
   TJCS_RGB = 0,
   /**
@@ -332,25 +336,27 @@
    * original image.  Originally, the analog equivalent of this transformation
    * allowed the same signal to drive both black & white and color televisions,
    * but JPEG images use YCbCr primarily because it allows the color data to be
-   * optionally subsampled for the purposes of reducing bandwidth or disk
-   * space.  YCbCr is the most common JPEG colorspace, and YCbCr JPEG images
-   * can be compressed from and decompressed to any of the extended RGB pixel
-   * formats or grayscale, or they can be decompressed to YUV planar images.
+   * optionally subsampled for the purposes of reducing network or disk usage.
+   * YCbCr is the most common JPEG colorspace, and YCbCr JPEG images can be
+   * compressed from and decompressed to packed-pixel images with any of the
+   * extended RGB or grayscale pixel formats.  YCbCr JPEG images can also be
+   * compressed from and decompressed to planar YUV images.
    */
   TJCS_YCbCr,
   /**
    * Grayscale colorspace.  The JPEG image retains only the luminance data (Y
    * component), and any color data from the source image is discarded.
-   * Grayscale JPEG images can be compressed from and decompressed to any of
-   * the extended RGB pixel formats or grayscale, or they can be decompressed
-   * to YUV planar images.
+   * Grayscale JPEG images can be compressed from and decompressed to
+   * packed-pixel images with any of the extended RGB or grayscale pixel
+   * formats, or they can be compressed from and decompressed to planar YUV
+   * images.
    */
   TJCS_GRAY,
   /**
    * CMYK colorspace.  When compressing the JPEG image, the C, M, Y, and K
    * components in the source image are reordered into image planes, but no
    * colorspace conversion or subsampling is performed.  CMYK JPEG images can
-   * only be decompressed to CMYK pixels.
+   * only be decompressed to packed-pixel images with the CMYK pixel format.
    */
   TJCS_CMYK,
   /**
@@ -360,56 +366,54 @@
    * reversibly transformed into YCCK, and as with YCbCr, the chrominance
    * components in the YCCK pixels can be subsampled without incurring major
    * perceptual loss.  YCCK JPEG images can only be compressed from and
-   * decompressed to CMYK pixels.
+   * decompressed to packed-pixel images with the CMYK pixel format.
    */
   TJCS_YCCK
 };
 
 
 /**
- * The uncompressed source/destination image is stored in bottom-up (Windows,
- * OpenGL) order, not top-down (X11) order.
+ * Rows in the packed-pixel source/destination image are stored in bottom-up
+ * (Windows, OpenGL) order rather than in top-down (X11) order.
  */
 #define TJFLAG_BOTTOMUP  2
 /**
  * When decompressing an image that was compressed using chrominance
- * subsampling, use the fastest chrominance upsampling algorithm available in
- * the underlying codec.  The default is to use smooth upsampling, which
- * creates a smooth transition between neighboring chrominance components in
- * order to reduce upsampling artifacts in the decompressed image.
+ * subsampling, use the fastest chrominance upsampling algorithm available.
+ * The default is to use smooth upsampling, which creates a smooth transition
+ * between neighboring chrominance components in order to reduce upsampling
+ * artifacts in the decompressed image.
  */
 #define TJFLAG_FASTUPSAMPLE  256
 /**
- * Disable buffer (re)allocation.  If passed to one of the JPEG compression or
- * transform functions, this flag will cause those functions to generate an
- * error if the JPEG image buffer is invalid or too small rather than
- * attempting to allocate or reallocate that buffer.  This reproduces the
- * behavior of earlier versions of TurboJPEG.
+ * Disable JPEG buffer (re)allocation.  If passed to one of the JPEG
+ * compression or transform functions, this flag will cause those functions to
+ * generate an error if the JPEG destination buffer is invalid or too small,
+ * rather than attempt to allocate or reallocate that buffer.
  */
 #define TJFLAG_NOREALLOC  1024
 /**
- * Use the fastest DCT/IDCT algorithm available in the underlying codec.  The
- * default if this flag is not specified is implementation-specific.  For
- * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
- * algorithm by default when compressing, because this has been shown to have
- * only a very slight effect on accuracy, but it uses the accurate algorithm
- * when decompressing, because this has been shown to have a larger effect.
+ * Use the fastest DCT/IDCT algorithm available.  The default if this flag is
+ * not specified is implementation-specific.  For example, the implementation
+ * of the TurboJPEG API in libjpeg-turbo uses the fast algorithm by default
+ * when compressing, because this has been shown to have only a very slight
+ * effect on accuracy, but it uses the accurate algorithm when decompressing,
+ * because this has been shown to have a larger effect.
  */
 #define TJFLAG_FASTDCT  2048
 /**
- * Use the most accurate DCT/IDCT algorithm available in the underlying codec.
- * The default if this flag is not specified is implementation-specific.  For
- * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
- * algorithm by default when compressing, because this has been shown to have
- * only a very slight effect on accuracy, but it uses the accurate algorithm
- * when decompressing, because this has been shown to have a larger effect.
+ * Use the most accurate DCT/IDCT algorithm available.  The default if this
+ * flag is not specified is implementation-specific.  For example, the
+ * implementation of the TurboJPEG API in libjpeg-turbo uses the fast algorithm
+ * by default when compressing, because this has been shown to have only a very
+ * slight effect on accuracy, but it uses the accurate algorithm when
+ * decompressing, because this has been shown to have a larger effect.
  */
 #define TJFLAG_ACCURATEDCT  4096
 /**
  * Immediately discontinue the current compression/decompression/transform
- * operation if the underlying codec throws a warning (non-fatal error).  The
- * default behavior is to allow the operation to complete unless a fatal error
- * is encountered.
+ * operation if a warning (non-fatal error) occurs.  The default behavior is to
+ * allow the operation to complete unless a fatal error is encountered.
  */
 #define TJFLAG_STOPONWARNING  8192
 /**
@@ -441,8 +445,8 @@
  */
 enum TJERR {
   /**
-   * The error was non-fatal and recoverable, but the image may still be
-   * corrupt.
+   * The error was non-fatal and recoverable, but the destination image may
+   * still be corrupt.
    */
   TJERR_WARNING = 0,
   /**
@@ -509,9 +513,9 @@
 /**
  * This option will cause #tjTransform() to return an error if the transform is
  * not perfect.  Lossless transforms operate on MCU blocks, whose size depends
- * on the level of chrominance subsampling used (see #tjMCUWidth
- * and #tjMCUHeight.)  If the image's width or height is not evenly divisible
- * by the MCU block size, then there will be partial MCU blocks on the right
+ * on the level of chrominance subsampling used (see #tjMCUWidth and
+ * #tjMCUHeight.)  If the image's width or height is not evenly divisible by
+ * the MCU block size, then there will be partial MCU blocks on the right
  * and/or bottom edges.  It is not possible to move these partial MCU blocks to
  * the top or left of the image, so any transform that would require that is
  * "imperfect."  If this option is not specified, then any partial MCU blocks
@@ -530,29 +534,28 @@
  */
 #define TJXOPT_CROP  4
 /**
- * This option will discard the color data in the input image and produce
- * a grayscale output image.
+ * This option will discard the color data in the source image and produce a
+ * grayscale destination image.
  */
 #define TJXOPT_GRAY  8
 /**
  * This option will prevent #tjTransform() from outputting a JPEG image for
- * this particular transform (this can be used in conjunction with a custom
+ * this particular transform.  (This can be used in conjunction with a custom
  * filter to capture the transformed DCT coefficients without transcoding
  * them.)
  */
 #define TJXOPT_NOOUTPUT  16
 /**
- * This option will enable progressive entropy coding in the output image
+ * This option will enable progressive entropy coding in the JPEG image
  * generated by this particular transform.  Progressive entropy coding will
  * generally improve compression relative to baseline entropy coding (the
- * default), but it will reduce compression and decompression performance
- * considerably.
+ * default), but it will reduce decompression performance considerably.
  */
 #define TJXOPT_PROGRESSIVE  32
 /**
  * This option will prevent #tjTransform() from copying any extra markers
- * (including EXIF and ICC profile data) from the source image to the output
- * image.
+ * (including EXIF and ICC profile data) from the source image to the
+ * destination image.
  */
 #define TJXOPT_COPYNONE  64
 
@@ -586,12 +589,12 @@
    */
   int y;
   /**
-   * The width of the cropping region. Setting this to 0 is the equivalent of
+   * The width of the cropping region.  Setting this to 0 is the equivalent of
    * setting it to the width of the source JPEG image - x.
    */
   int w;
   /**
-   * The height of the cropping region. Setting this to 0 is the equivalent of
+   * The height of the cropping region.  Setting this to 0 is the equivalent of
    * setting it to the height of the source JPEG image - y.
    */
   int h;
@@ -610,7 +613,8 @@
    */
   int op;
   /**
-   * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options"
+   * The bitwise OR of one of more of the @ref TJXOPT_COPYNONE
+   * "transform options"
    */
   int options;
   /**
@@ -619,10 +623,10 @@
    */
   void *data;
   /**
-   * A callback function that can be used to modify the DCT coefficients
-   * after they are losslessly transformed but before they are transcoded to a
-   * new JPEG image.  This allows for custom filters or other transformations
-   * to be applied in the frequency domain.
+   * A callback function that can be used to modify the DCT coefficients after
+   * they are losslessly transformed but before they are transcoded to a new
+   * JPEG image.  This allows for custom filters or other transformations to be
+   * applied in the frequency domain.
    *
    * @param coeffs pointer to an array of transformed DCT coefficients.  (NOTE:
    * this pointer is not guaranteed to be valid once the callback returns, so
@@ -630,21 +634,21 @@
    * or library should make a copy of them within the body of the callback.)
    *
    * @param arrayRegion #tjregion structure containing the width and height of
-   * the array pointed to by <tt>coeffs</tt> as well as its offset relative to
-   * the component plane.  TurboJPEG implementations may choose to split each
+   * the array pointed to by `coeffs` as well as its offset relative to the
+   * component plane.  TurboJPEG implementations may choose to split each
    * component plane into multiple DCT coefficient arrays and call the callback
    * function once for each array.
    *
    * @param planeRegion #tjregion structure containing the width and height of
-   * the component plane to which <tt>coeffs</tt> belongs
+   * the component plane to which `coeffs` belongs
    *
-   * @param componentID ID number of the component plane to which
-   * <tt>coeffs</tt> belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1,
-   * and 2 in typical JPEG images.)
+   * @param componentID ID number of the component plane to which `coeffs`
+   * belongs.  (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in
+   * typical JPEG images.)
    *
-   * @param transformID ID number of the transformed image to which
-   * <tt>coeffs</tt> belongs.  This is the same as the index of the transform
-   * in the <tt>transforms</tt> array that was passed to #tjTransform().
+   * @param transformID ID number of the transformed image to which `coeffs`
+   * belongs.  This is the same as the index of the transform in the
+   * `transforms` array that was passed to #tjTransform().
    *
    * @param transform a pointer to a #tjtransform structure that specifies the
    * parameters and/or cropping region for this transform
@@ -663,17 +667,17 @@
 
 
 /**
- * Pad the given width to the nearest 32-bit boundary
+ * Pad the given width to the nearest multiple of 4
  */
 #define TJPAD(width)  (((width) + 3) & (~3))
 
 /**
- * Compute the scaled value of <tt>dimension</tt> using the given scaling
- * factor.  This macro performs the integer equivalent of <tt>ceil(dimension *
- * scalingFactor)</tt>.
+ * Compute the scaled value of `dimension` using the given scaling factor.
+ * This macro performs the integer equivalent of `ceil(dimension *
+ * scalingFactor)`.
  */
 #define TJSCALED(dimension, scalingFactor) \
-  ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \
+  (((dimension) * scalingFactor.num + scalingFactor.denom - 1) / \
    scalingFactor.denom)
 
 
@@ -685,27 +689,27 @@
 /**
  * Create a TurboJPEG compressor instance.
  *
- * @return a handle to the newly-created instance, or NULL if an error
- * occurred (see #tjGetErrorStr2().)
+ * @return a handle to the newly-created instance, or NULL if an error occurred
+ * (see #tjGetErrorStr2().)
  */
 DLLEXPORT tjhandle tjInitCompress(void);
 
 
 /**
- * Compress an RGB, grayscale, or CMYK image into a JPEG image.
+ * Compress a packed-pixel RGB, grayscale, or CMYK image into a JPEG image.
  *
  * @param handle a handle to a TurboJPEG compressor or transformer instance
  *
- * @param srcBuf pointer to an image buffer containing RGB, grayscale, or
- * CMYK pixels to be compressed
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK source image to be compressed
  *
  * @param width width (in pixels) of the source image
  *
- * @param pitch bytes per line in the source image.  Normally, this should be
- * <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded, or
- * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of the image
- * is padded to the nearest 32-bit boundary, as is the case for Windows
- * bitmaps.  You can also be clever and use this parameter to skip lines, etc.
+ * @param pitch bytes per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded, or
+ * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each row of the image
+ * is padded to the nearest multiple of 4 bytes, as is the case for Windows
+ * bitmaps.  You can also be clever and use this parameter to skip rows, etc.
  * Setting this parameter to 0 is the equivalent of setting it to
  * <tt>width * #tjPixelSize[pixelFormat]</tt>.
  *
@@ -714,29 +718,28 @@
  * @param pixelFormat pixel format of the source image (see @ref TJPF
  * "Pixel formats".)
  *
- * @param jpegBuf address of a pointer to an image buffer that will receive the
- * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer
- * to accommodate the size of the JPEG image.  Thus, you can choose to:
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
  * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and
  * let TurboJPEG grow the buffer as needed,
- * -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the buffer
- * for you, or
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
  * -# pre-allocate the buffer to a "worst case" size determined by calling
  * #tjBufSize().  This should ensure that the buffer never has to be
- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.)
+ * re-allocated.  (Setting #TJFLAG_NOREALLOC guarantees that it won't be.)
  * .
- * If you choose option 1, <tt>*jpegSize</tt> should be set to the size of your
+ * If you choose option 1, then `*jpegSize` should be set to the size of your
  * pre-allocated buffer.  In any case, unless you have set #TJFLAG_NOREALLOC,
- * you should always check <tt>*jpegBuf</tt> upon return from this function, as
- * it may have changed.
+ * you should always check `*jpegBuf` upon return from this function, as it may
+ * have changed.
  *
  * @param jpegSize pointer to an unsigned long variable that holds the size of
- * the JPEG image buffer.  If <tt>*jpegBuf</tt> points to a pre-allocated
- * buffer, then <tt>*jpegSize</tt> should be set to the size of the buffer.
- * Upon return, <tt>*jpegSize</tt> will contain the size of the JPEG image (in
- * bytes.)  If <tt>*jpegBuf</tt> points to a JPEG image buffer that is being
- * reused from a previous call to one of the JPEG compression functions, then
- * <tt>*jpegSize</tt> is ignored.
+ * the JPEG buffer.  If `*jpegBuf` points to a pre-allocated buffer, then
+ * `*jpegSize` should be set to the size of the buffer.  Upon return,
+ * `*jpegSize` will contain the size of the JPEG image (in bytes.)  If
+ * `*jpegBuf` points to a JPEG buffer that is being reused from a previous call
+ * to one of the JPEG compression functions, then `*jpegSize` is ignored.
  *
  * @param jpegSubsamp the level of chrominance subsampling to be used when
  * generating the JPEG image (see @ref TJSAMP
@@ -750,7 +753,7 @@
  *
  * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2()
  * and #tjGetErrorCode().)
-*/
+ */
 DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
                           int width, int pitch, int height, int pixelFormat,
                           unsigned char **jpegBuf, unsigned long *jpegSize,
@@ -758,55 +761,55 @@
 
 
 /**
- * Compress a YUV planar image into a JPEG image.
+ * Compress a unified planar YUV image into a JPEG image.
  *
  * @param handle a handle to a TurboJPEG compressor or transformer instance
  *
- * @param srcBuf pointer to an image buffer containing a YUV planar image to be
- * compressed.  The size of this buffer should match the value returned by
- * #tjBufSizeYUV2() for the given image width, height, padding, and level of
- * chrominance subsampling.  The Y, U (Cb), and V (Cr) image planes should be
- * stored sequentially in the source buffer (refer to @ref YUVnotes
- * "YUV Image Format Notes".)
+ * @param srcBuf pointer to a buffer containing a unified planar YUV source
+ * image to be compressed.  The size of this buffer should match the value
+ * returned by #tjBufSizeYUV2() for the given image width, height, row
+ * alignment, and level of chrominance subsampling.  The Y, U (Cb), and V (Cr)
+ * image planes should be stored sequentially in the buffer.  (Refer to
+ * @ref YUVnotes "YUV Image Format Notes".)
  *
  * @param width width (in pixels) of the source image.  If the width is not an
  * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate
- * buffer copy will be performed within TurboJPEG.
+ * buffer copy will be performed.
  *
- * @param pad the line padding used in the source image.  For instance, if each
- * line in each plane of the YUV image is padded to the nearest multiple of 4
- * bytes, then <tt>pad</tt> should be set to 4.
+ * @param align row alignment (in bytes) of the source image (must be a power
+ * of 2.)  Setting this parameter to n indicates that each row in each plane of
+ * the source image is padded to the nearest multiple of n bytes
+ * (1 = unpadded.)
  *
  * @param height height (in pixels) of the source image.  If the height is not
  * an even multiple of the MCU block height (see #tjMCUHeight), then an
- * intermediate buffer copy will be performed within TurboJPEG.
+ * intermediate buffer copy will be performed.
  *
- * @param subsamp the level of chrominance subsampling used in the source
- * image (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param subsamp the level of chrominance subsampling used in the source image
+ * (see @ref TJSAMP "Chrominance subsampling options".)
  *
- * @param jpegBuf address of a pointer to an image buffer that will receive the
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
  * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
  * accommodate the size of the JPEG image.  Thus, you can choose to:
  * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and
  * let TurboJPEG grow the buffer as needed,
- * -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the buffer
- * for you, or
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
  * -# pre-allocate the buffer to a "worst case" size determined by calling
  * #tjBufSize().  This should ensure that the buffer never has to be
- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.)
+ * re-allocated.  (Setting #TJFLAG_NOREALLOC guarantees that it won't be.)
  * .
- * If you choose option 1, <tt>*jpegSize</tt> should be set to the size of your
+ * If you choose option 1, then `*jpegSize` should be set to the size of your
  * pre-allocated buffer.  In any case, unless you have set #TJFLAG_NOREALLOC,
- * you should always check <tt>*jpegBuf</tt> upon return from this function, as
- * it may have changed.
+ * you should always check `*jpegBuf` upon return from this function, as it may
+ * have changed.
  *
  * @param jpegSize pointer to an unsigned long variable that holds the size of
- * the JPEG image buffer.  If <tt>*jpegBuf</tt> points to a pre-allocated
- * buffer, then <tt>*jpegSize</tt> should be set to the size of the buffer.
- * Upon return, <tt>*jpegSize</tt> will contain the size of the JPEG image (in
- * bytes.)  If <tt>*jpegBuf</tt> points to a JPEG image buffer that is being
- * reused from a previous call to one of the JPEG compression functions, then
- * <tt>*jpegSize</tt> is ignored.
+ * the JPEG buffer.  If `*jpegBuf` points to a pre-allocated buffer, then
+ * `*jpegSize` should be set to the size of the buffer.  Upon return,
+ * `*jpegSize` will contain the size of the JPEG image (in bytes.)  If
+ * `*jpegBuf` points to a JPEG buffer that is being reused from a previous call
+ * to one of the JPEG compression functions, then `*jpegSize` is ignored.
  *
  * @param jpegQual the image quality of the generated JPEG image (1 = worst,
  * 100 = best)
@@ -816,9 +819,9 @@
  *
  * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2()
  * and #tjGetErrorCode().)
-*/
+ */
 DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf,
-                                int width, int pad, int height, int subsamp,
+                                int width, int align, int height, int subsamp,
                                 unsigned char **jpegBuf,
                                 unsigned long *jpegSize, int jpegQual,
                                 int flags);
@@ -831,55 +834,54 @@
  *
  * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
  * (or just a Y plane, if compressing a grayscale image) that contain a YUV
- * image to be compressed.  These planes can be contiguous or non-contiguous in
- * memory.  The size of each plane should match the value returned by
- * #tjPlaneSizeYUV() for the given image width, height, strides, and level of
- * chrominance subsampling.  Refer to @ref YUVnotes "YUV Image Format Notes"
- * for more details.
+ * source image to be compressed.  These planes can be contiguous or
+ * non-contiguous in memory.  The size of each plane should match the value
+ * returned by #tjPlaneSizeYUV() for the given image width, height, strides,
+ * and level of chrominance subsampling.  Refer to @ref YUVnotes
+ * "YUV Image Format Notes" for more details.
  *
  * @param width width (in pixels) of the source image.  If the width is not an
  * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate
- * buffer copy will be performed within TurboJPEG.
+ * buffer copy will be performed.
  *
  * @param strides an array of integers, each specifying the number of bytes per
- * line in the corresponding plane of the YUV source image.  Setting the stride
+ * row in the corresponding plane of the YUV source image.  Setting the stride
  * for any plane to 0 is the same as setting it to the plane width (see
- * @ref YUVnotes "YUV Image Format Notes".)  If <tt>strides</tt> is NULL, then
- * the strides for all planes will be set to their respective plane widths.
- * You can adjust the strides in order to specify an arbitrary amount of line
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective plane widths.  You
+ * can adjust the strides in order to specify an arbitrary amount of row
  * padding in each plane or to create a JPEG image from a subregion of a larger
- * YUV planar image.
+ * planar YUV image.
  *
  * @param height height (in pixels) of the source image.  If the height is not
  * an even multiple of the MCU block height (see #tjMCUHeight), then an
- * intermediate buffer copy will be performed within TurboJPEG.
+ * intermediate buffer copy will be performed.
  *
- * @param subsamp the level of chrominance subsampling used in the source
- * image (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param subsamp the level of chrominance subsampling used in the source image
+ * (see @ref TJSAMP "Chrominance subsampling options".)
  *
- * @param jpegBuf address of a pointer to an image buffer that will receive the
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
  * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
  * accommodate the size of the JPEG image.  Thus, you can choose to:
  * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and
  * let TurboJPEG grow the buffer as needed,
- * -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the buffer
- * for you, or
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
  * -# pre-allocate the buffer to a "worst case" size determined by calling
  * #tjBufSize().  This should ensure that the buffer never has to be
- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.)
+ * re-allocated.  (Setting #TJFLAG_NOREALLOC guarantees that it won't be.)
  * .
- * If you choose option 1, <tt>*jpegSize</tt> should be set to the size of your
+ * If you choose option 1, then `*jpegSize` should be set to the size of your
  * pre-allocated buffer.  In any case, unless you have set #TJFLAG_NOREALLOC,
- * you should always check <tt>*jpegBuf</tt> upon return from this function, as
- * it may have changed.
+ * you should always check `*jpegBuf` upon return from this function, as it may
+ * have changed.
  *
  * @param jpegSize pointer to an unsigned long variable that holds the size of
- * the JPEG image buffer.  If <tt>*jpegBuf</tt> points to a pre-allocated
- * buffer, then <tt>*jpegSize</tt> should be set to the size of the buffer.
- * Upon return, <tt>*jpegSize</tt> will contain the size of the JPEG image (in
- * bytes.)  If <tt>*jpegBuf</tt> points to a JPEG image buffer that is being
- * reused from a previous call to one of the JPEG compression functions, then
- * <tt>*jpegSize</tt> is ignored.
+ * the JPEG buffer.  If `*jpegBuf` points to a pre-allocated buffer, then
+ * `*jpegSize` should be set to the size of the buffer.  Upon return,
+ * `*jpegSize` will contain the size of the JPEG image (in bytes.)  If
+ * `*jpegBuf` points to a JPEG buffer that is being reused from a previous call
+ * to one of the JPEG compression functions, then `*jpegSize` is ignored.
  *
  * @param jpegQual the image quality of the generated JPEG image (1 = worst,
  * 100 = best)
@@ -889,7 +891,7 @@
  *
  * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2()
  * and #tjGetErrorCode().)
-*/
+ */
 DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle,
                                       const unsigned char **srcPlanes,
                                       int width, const int *strides,
@@ -903,11 +905,11 @@
  * The maximum size of the buffer (in bytes) required to hold a JPEG image with
  * the given parameters.  The number of bytes returned by this function is
  * larger than the size of the uncompressed source image.  The reason for this
- * is that the JPEG format uses 16-bit coefficients, and it is thus possible
- * for a very high-quality JPEG image with very high-frequency content to
- * expand rather than compress when converted to the JPEG format.  Such images
- * represent a very rare corner case, but since there is no way to predict the
- * size of a JPEG image prior to compression, the corner case has to be
+ * is that the JPEG format uses 16-bit coefficients, so it is possible for a
+ * very high-quality source image with very high-frequency content to expand
+ * rather than compress when converted to the JPEG format.  Such images
+ * represent very rare corner cases, but since there is no way to predict the
+ * size of a JPEG image prior to compression, the corner cases have to be
  * handled.
  *
  * @param width width (in pixels) of the image
@@ -925,23 +927,24 @@
 
 
 /**
- * The size of the buffer (in bytes) required to hold a YUV planar image with
- * the given parameters.
+ * The size of the buffer (in bytes) required to hold a unified planar YUV
+ * image with the given parameters.
  *
  * @param width width (in pixels) of the image
  *
- * @param pad the width of each line in each plane of the image is padded to
- * the nearest multiple of this number of bytes (must be a power of 2.)
+ * @param align row alignment (in bytes) of the image (must be a power of 2.)
+ * Setting this parameter to n specifies that each row in each plane of the
+ * image will be padded to the nearest multiple of n bytes (1 = unpadded.)
  *
  * @param height height (in pixels) of the image
  *
  * @param subsamp level of chrominance subsampling in the image (see
  * @ref TJSAMP "Chrominance subsampling options".)
  *
- * @return the size of the buffer (in bytes) required to hold the image, or
- * -1 if the arguments are out of bounds.
+ * @return the size of the buffer (in bytes) required to hold the image, or -1
+ * if the arguments are out of bounds.
  */
-DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height,
+DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height,
                                       int subsamp);
 
 
@@ -954,7 +957,7 @@
  * @param width width (in pixels) of the YUV image.  NOTE: this is the width of
  * the whole image, not the plane width.
  *
- * @param stride bytes per line in the image plane.  Setting this to 0 is the
+ * @param stride bytes per row in the image plane.  Setting this to 0 is the
  * equivalent of setting it to the plane width.
  *
  * @param height height (in pixels) of the YUV image.  NOTE: this is the height
@@ -1005,23 +1008,23 @@
 
 
 /**
- * Encode an RGB or grayscale image into a YUV planar image.  This function
- * uses the accelerated color conversion routines in the underlying
- * codec but does not execute any of the other steps in the JPEG compression
- * process.
+ * Encode a packed-pixel RGB or grayscale image into a unified planar YUV
+ * image.  This function performs color conversion (which is accelerated in the
+ * libjpeg-turbo implementation) but does not execute any of the other steps in
+ * the JPEG compression process.
  *
  * @param handle a handle to a TurboJPEG compressor or transformer instance
  *
- * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels
- * to be encoded
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale
+ * source image to be encoded
  *
  * @param width width (in pixels) of the source image
  *
- * @param pitch bytes per line in the source image.  Normally, this should be
- * <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded, or
- * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of the image
- * is padded to the nearest 32-bit boundary, as is the case for Windows
- * bitmaps.  You can also be clever and use this parameter to skip lines, etc.
+ * @param pitch bytes per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded, or
+ * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each row of the image
+ * is padded to the nearest multiple of 4 bytes, as is the case for Windows
+ * bitmaps.  You can also be clever and use this parameter to skip rows, etc.
  * Setting this parameter to 0 is the equivalent of setting it to
  * <tt>width * #tjPixelSize[pixelFormat]</tt>.
  *
@@ -1030,53 +1033,54 @@
  * @param pixelFormat pixel format of the source image (see @ref TJPF
  * "Pixel formats".)
  *
- * @param dstBuf pointer to an image buffer that will receive the YUV image.
- * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based
- * on the image width, height, padding, and level of chrominance subsampling.
- * The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the
- * buffer (refer to @ref YUVnotes "YUV Image Format Notes".)
+ * @param dstBuf pointer to a buffer that will receive the unified planar YUV
+ * image.  Use #tjBufSizeYUV2() to determine the appropriate size for this
+ * buffer based on the image width, height, row alignment, and level of
+ * chrominance subsampling.  The Y, U (Cb), and V (Cr) image planes will be
+ * stored sequentially in the buffer.  (Refer to @ref YUVnotes
+ * "YUV Image Format Notes".)
  *
- * @param pad the width of each line in each plane of the YUV image will be
- * padded to the nearest multiple of this number of bytes (must be a power of
- * 2.)  To generate images suitable for X Video, <tt>pad</tt> should be set to
- * 4.
+ * @param align row alignment (in bytes) of the YUV image (must be a power of
+ * 2.)  Setting this parameter to n will cause each row in each plane of the
+ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
+ * To generate images suitable for X Video, `align` should be set to 4.
  *
  * @param subsamp the level of chrominance subsampling to be used when
  * generating the YUV image (see @ref TJSAMP
  * "Chrominance subsampling options".)  To generate images suitable for X
- * Video, <tt>subsamp</tt> should be set to @ref TJSAMP_420.  This produces an
- * image compatible with the I420 (AKA "YUV420P") format.
+ * Video, `subsamp` should be set to @ref TJSAMP_420.  This produces an image
+ * compatible with the I420 (AKA "YUV420P") format.
  *
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT
  * "flags"
  *
  * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2()
  * and #tjGetErrorCode().)
-*/
+ */
 DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf,
                            int width, int pitch, int height, int pixelFormat,
-                           unsigned char *dstBuf, int pad, int subsamp,
+                           unsigned char *dstBuf, int align, int subsamp,
                            int flags);
 
 
 /**
- * Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image
- * planes.  This function uses the accelerated color conversion routines in the
- * underlying codec but does not execute any of the other steps in the JPEG
- * compression process.
+ * Encode a packed-pixel RGB or grayscale image into separate Y, U (Cb), and
+ * V (Cr) image planes.  This function performs color conversion (which is
+ * accelerated in the libjpeg-turbo implementation) but does not execute any of
+ * the other steps in the JPEG compression process.
  *
  * @param handle a handle to a TurboJPEG compressor or transformer instance
  *
- * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels
- * to be encoded
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale
+ * source image to be encoded
  *
  * @param width width (in pixels) of the source image
  *
- * @param pitch bytes per line in the source image.  Normally, this should be
- * <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded, or
- * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of the image
- * is padded to the nearest 32-bit boundary, as is the case for Windows
- * bitmaps.  You can also be clever and use this parameter to skip lines, etc.
+ * @param pitch bytes per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded, or
+ * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each row of the image
+ * is padded to the nearest multiple of 4 bytes, as is the case for Windows
+ * bitmaps.  You can also be clever and use this parameter to skip rows, etc.
  * Setting this parameter to 0 is the equivalent of setting it to
  * <tt>width * #tjPixelSize[pixelFormat]</tt>.
  *
@@ -1093,26 +1097,26 @@
  * Refer to @ref YUVnotes "YUV Image Format Notes" for more details.
  *
  * @param strides an array of integers, each specifying the number of bytes per
- * line in the corresponding plane of the output image.  Setting the stride for
- * any plane to 0 is the same as setting it to the plane width (see
- * @ref YUVnotes "YUV Image Format Notes".)  If <tt>strides</tt> is NULL, then
- * the strides for all planes will be set to their respective plane widths.
- * You can adjust the strides in order to add an arbitrary amount of line
- * padding to each plane or to encode an RGB or grayscale image into a
- * subregion of a larger YUV planar image.
+ * row in the corresponding plane of the YUV image.  Setting the stride for any
+ * plane to 0 is the same as setting it to the plane width (see @ref YUVnotes
+ * "YUV Image Format Notes".)  If `strides` is NULL, then the strides for all
+ * planes will be set to their respective plane widths.  You can adjust the
+ * strides in order to add an arbitrary amount of row padding to each plane or
+ * to encode an RGB or grayscale image into a subregion of a larger planar YUV
+ * image.
  *
  * @param subsamp the level of chrominance subsampling to be used when
  * generating the YUV image (see @ref TJSAMP
  * "Chrominance subsampling options".)  To generate images suitable for X
- * Video, <tt>subsamp</tt> should be set to @ref TJSAMP_420.  This produces an
- * image compatible with the I420 (AKA "YUV420P") format.
+ * Video, `subsamp` should be set to @ref TJSAMP_420.  This produces an image
+ * compatible with the I420 (AKA "YUV420P") format.
  *
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT
  * "flags"
  *
  * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2()
  * and #tjGetErrorCode().)
-*/
+ */
 DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf,
                                 int width, int pitch, int height,
                                 int pixelFormat, unsigned char **dstPlanes,
@@ -1122,38 +1126,49 @@
 /**
  * Create a TurboJPEG decompressor instance.
  *
- * @return a handle to the newly-created instance, or NULL if an error
- * occurred (see #tjGetErrorStr2().)
-*/
+ * @return a handle to the newly-created instance, or NULL if an error occurred
+ * (see #tjGetErrorStr2().)
+ */
 DLLEXPORT tjhandle tjInitDecompress(void);
 
 
 /**
- * Retrieve information about a JPEG image without decompressing it.
+ * Retrieve information about a JPEG image without decompressing it, or prime
+ * the decompressor with quantization and Huffman tables.
  *
  * @param handle a handle to a TurboJPEG decompressor or transformer instance
  *
- * @param jpegBuf pointer to a buffer containing a JPEG image
+ * @param jpegBuf pointer to a byte buffer containing a JPEG image or an
+ * "abbreviated table specification" (AKA "tables-only") datastream.  Passing a
+ * tables-only datastream to this function primes the decompressor with
+ * quantization and Huffman tables that can be used when decompressing
+ * subsequent "abbreviated image" datastreams.  This is useful, for instance,
+ * when decompressing video streams in which all frames share the same
+ * quantization and Huffman tables.
  *
- * @param jpegSize size of the JPEG image (in bytes)
+ * @param jpegSize size of the JPEG image or tables-only datastream (in bytes)
  *
  * @param width pointer to an integer variable that will receive the width (in
- * pixels) of the JPEG image
+ * pixels) of the JPEG image.  If `jpegBuf` points to a tables-only datastream,
+ * then `width` is ignored.
  *
  * @param height pointer to an integer variable that will receive the height
- * (in pixels) of the JPEG image
+ * (in pixels) of the JPEG image.  If `jpegBuf` points to a tables-only
+ * datastream, then `height` is ignored.
  *
  * @param jpegSubsamp pointer to an integer variable that will receive the
  * level of chrominance subsampling used when the JPEG image was compressed
- * (see @ref TJSAMP "Chrominance subsampling options".)
+ * (see @ref TJSAMP "Chrominance subsampling options".)  If `jpegBuf` points to
+ * a tables-only datastream, then `jpegSubsamp` is ignored.
  *
  * @param jpegColorspace pointer to an integer variable that will receive one
  * of the JPEG colorspace constants, indicating the colorspace of the JPEG
- * image (see @ref TJCS "JPEG colorspaces".)
+ * image (see @ref TJCS "JPEG colorspaces".)  If `jpegBuf` points to a
+ * tables-only datastream, then `jpegColorspace` is ignored.
  *
  * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2()
  * and #tjGetErrorCode().)
-*/
+ */
 DLLEXPORT int tjDecompressHeader3(tjhandle handle,
                                   const unsigned char *jpegBuf,
                                   unsigned long jpegSize, int *width,
@@ -1162,58 +1177,60 @@
 
 
 /**
- * Returns a list of fractional scaling factors that the JPEG decompressor in
- * this implementation of TurboJPEG supports.
+ * Returns a list of fractional scaling factors that the JPEG decompressor
+ * supports.
  *
- * @param numscalingfactors pointer to an integer variable that will receive
+ * @param numScalingFactors pointer to an integer variable that will receive
  * the number of elements in the list
  *
  * @return a pointer to a list of fractional scaling factors, or NULL if an
  * error is encountered (see #tjGetErrorStr2().)
-*/
-DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors);
+ */
+DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numScalingFactors);
 
 
 /**
- * Decompress a JPEG image to an RGB, grayscale, or CMYK image.
+ * Decompress a JPEG image into a packed-pixel RGB, grayscale, or CMYK image.
  *
  * @param handle a handle to a TurboJPEG decompressor or transformer instance
  *
- * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
  *
  * @param jpegSize size of the JPEG image (in bytes)
  *
- * @param dstBuf pointer to an image buffer that will receive the decompressed
- * image.  This buffer should normally be <tt>pitch * scaledHeight</tt> bytes
- * in size, where <tt>scaledHeight</tt> can be determined by calling
- * #TJSCALED() with the JPEG image height and one of the scaling factors
- * returned by #tjGetScalingFactors().  The <tt>dstBuf</tt> pointer may also be
- * used to decompress into a specific region of a larger buffer.
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel
+ * decompressed image.  This buffer should normally be `pitch * scaledHeight`
+ * bytes in size, where `scaledHeight` can be determined by calling #TJSCALED()
+ * with the JPEG image height and one of the scaling factors returned by
+ * #tjGetScalingFactors().  The `dstBuf` pointer may also be used to decompress
+ * into a specific region of a larger buffer.
  *
  * @param width desired width (in pixels) of the destination image.  If this is
  * different than the width of the JPEG image being decompressed, then
  * TurboJPEG will use scaling in the JPEG decompressor to generate the largest
- * possible image that will fit within the desired width.  If <tt>width</tt> is
- * set to 0, then only the height will be considered when determining the
- * scaled image size.
+ * possible image that will fit within the desired width.  If `width` is set to
+ * 0, then only the height will be considered when determining the scaled image
+ * size.
  *
- * @param pitch bytes per line in the destination image.  Normally, this is
- * <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt> if the decompressed image
- * is unpadded, else <tt>#TJPAD(scaledWidth * #tjPixelSize[pixelFormat])</tt>
- * if each line of the decompressed image is padded to the nearest 32-bit
- * boundary, as is the case for Windows bitmaps.  (NOTE: <tt>scaledWidth</tt>
- * can be determined by calling #TJSCALED() with the JPEG image width and one
- * of the scaling factors returned by #tjGetScalingFactors().)  You can also be
- * clever and use the pitch parameter to skip lines, etc.  Setting this
- * parameter to 0 is the equivalent of setting it to
+ * @param pitch bytes per row in the destination image.  Normally this should
+ * be set to <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt>, if the
+ * destination image should be unpadded, or
+ * <tt>#TJPAD(scaledWidth * #tjPixelSize[pixelFormat])</tt> if each row of the
+ * destination image should be padded to the nearest multiple of 4 bytes, as is
+ * the case for Windows bitmaps.  (NOTE: `scaledWidth` can be determined by
+ * calling #TJSCALED() with the JPEG image width and one of the scaling factors
+ * returned by #tjGetScalingFactors().)  You can also be clever and use the
+ * pitch parameter to skip rows, etc.  Setting this parameter to 0 is the
+ * equivalent of setting it to
  * <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt>.
  *
  * @param height desired height (in pixels) of the destination image.  If this
  * is different than the height of the JPEG image being decompressed, then
  * TurboJPEG will use scaling in the JPEG decompressor to generate the largest
- * possible image that will fit within the desired height.  If <tt>height</tt>
- * is set to 0, then only the width will be considered when determining the
- * scaled image size.
+ * possible image that will fit within the desired height.  If `height` is set
+ * to 0, then only the width will be considered when determining the scaled
+ * image size.
  *
  * @param pixelFormat pixel format of the destination image (see @ref
  * TJPF "Pixel formats".)
@@ -1231,44 +1248,45 @@
 
 
 /**
- * Decompress a JPEG image to a YUV planar image.  This function performs JPEG
- * decompression but leaves out the color conversion step, so a planar YUV
- * image is generated instead of an RGB image.
+ * Decompress a JPEG image into a unified planar YUV image.  This function
+ * performs JPEG decompression but leaves out the color conversion step, so a
+ * planar YUV image is generated instead of a packed-pixel image.
  *
  * @param handle a handle to a TurboJPEG decompressor or transformer instance
  *
- * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
  *
  * @param jpegSize size of the JPEG image (in bytes)
  *
- * @param dstBuf pointer to an image buffer that will receive the YUV image.
- * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based
- * on the image width, height, padding, and level of subsampling.  The Y,
- * U (Cb), and V (Cr) image planes will be stored sequentially in the buffer
- * (refer to @ref YUVnotes "YUV Image Format Notes".)
+ * @param dstBuf pointer to a buffer that will receive the unified planar YUV
+ * decompressed image.  Use #tjBufSizeYUV2() to determine the appropriate size
+ * for this buffer based on the scaled image width, scaled image height, row
+ * alignment, and level of chrominance subsampling.  The Y, U (Cb), and V (Cr)
+ * image planes will be stored sequentially in the buffer.  (Refer to
+ * @ref YUVnotes "YUV Image Format Notes".)
  *
  * @param width desired width (in pixels) of the YUV image.  If this is
  * different than the width of the JPEG image being decompressed, then
  * TurboJPEG will use scaling in the JPEG decompressor to generate the largest
- * possible image that will fit within the desired width.  If <tt>width</tt> is
- * set to 0, then only the height will be considered when determining the
- * scaled image size.  If the scaled width is not an even multiple of the MCU
- * block width (see #tjMCUWidth), then an intermediate buffer copy will be
- * performed within TurboJPEG.
+ * possible image that will fit within the desired width.  If `width` is set to
+ * 0, then only the height will be considered when determining the scaled image
+ * size.  If the scaled width is not an even multiple of the MCU block width
+ * (see #tjMCUWidth), then an intermediate buffer copy will be performed.
  *
- * @param pad the width of each line in each plane of the YUV image will be
- * padded to the nearest multiple of this number of bytes (must be a power of
- * 2.)  To generate images suitable for X Video, <tt>pad</tt> should be set to
- * 4.
+ * @param align row alignment (in bytes) of the YUV image (must be a power of
+ * 2.)  Setting this parameter to n will cause each row in each plane of the
+ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
+ * To generate images suitable for X Video, `align` should be set to 4.
  *
  * @param height desired height (in pixels) of the YUV image.  If this is
  * different than the height of the JPEG image being decompressed, then
  * TurboJPEG will use scaling in the JPEG decompressor to generate the largest
- * possible image that will fit within the desired height.  If <tt>height</tt>
- * is set to 0, then only the width will be considered when determining the
- * scaled image size.  If the scaled height is not an even multiple of the MCU
- * block height (see #tjMCUHeight), then an intermediate buffer copy will be
- * performed within TurboJPEG.
+ * possible image that will fit within the desired height.  If `height` is set
+ * to 0, then only the width will be considered when determining the scaled
+ * image size.  If the scaled height is not an even multiple of the MCU block
+ * height (see #tjMCUHeight), then an intermediate buffer copy will be
+ * performed.
  *
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT
  * "flags"
@@ -1278,54 +1296,55 @@
  */
 DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf,
                                  unsigned long jpegSize, unsigned char *dstBuf,
-                                 int width, int pad, int height, int flags);
+                                 int width, int align, int height, int flags);
 
 
 /**
  * Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image
  * planes.  This function performs JPEG decompression but leaves out the color
- * conversion step, so a planar YUV image is generated instead of an RGB image.
+ * conversion step, so a planar YUV image is generated instead of a
+ * packed-pixel image.
  *
  * @param handle a handle to a TurboJPEG decompressor or transformer instance
  *
- * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
  *
  * @param jpegSize size of the JPEG image (in bytes)
  *
  * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
  * (or just a Y plane, if decompressing a grayscale image) that will receive
- * the YUV image.  These planes can be contiguous or non-contiguous in memory.
- * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based
- * on the scaled image width, scaled image height, strides, and level of
- * chrominance subsampling.  Refer to @ref YUVnotes "YUV Image Format Notes"
- * for more details.
+ * the decompressed image.  These planes can be contiguous or non-contiguous in
+ * memory.  Use #tjPlaneSizeYUV() to determine the appropriate size for each
+ * plane based on the scaled image width, scaled image height, strides, and
+ * level of chrominance subsampling.  Refer to @ref YUVnotes
+ * "YUV Image Format Notes" for more details.
  *
  * @param width desired width (in pixels) of the YUV image.  If this is
  * different than the width of the JPEG image being decompressed, then
  * TurboJPEG will use scaling in the JPEG decompressor to generate the largest
- * possible image that will fit within the desired width.  If <tt>width</tt> is
- * set to 0, then only the height will be considered when determining the
- * scaled image size.  If the scaled width is not an even multiple of the MCU
- * block width (see #tjMCUWidth), then an intermediate buffer copy will be
- * performed within TurboJPEG.
+ * possible image that will fit within the desired width.  If `width` is set to
+ * 0, then only the height will be considered when determining the scaled image
+ * size.  If the scaled width is not an even multiple of the MCU block width
+ * (see #tjMCUWidth), then an intermediate buffer copy will be performed.
  *
  * @param strides an array of integers, each specifying the number of bytes per
- * line in the corresponding plane of the output image.  Setting the stride for
- * any plane to 0 is the same as setting it to the scaled plane width (see
- * @ref YUVnotes "YUV Image Format Notes".)  If <tt>strides</tt> is NULL, then
- * the strides for all planes will be set to their respective scaled plane
- * widths.  You can adjust the strides in order to add an arbitrary amount of
- * line padding to each plane or to decompress the JPEG image into a subregion
- * of a larger YUV planar image.
+ * row in the corresponding plane of the YUV image.  Setting the stride for any
+ * plane to 0 is the same as setting it to the scaled plane width (see
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective scaled plane widths.
+ * You can adjust the strides in order to add an arbitrary amount of row
+ * padding to each plane or to decompress the JPEG image into a subregion of a
+ * larger planar YUV image.
  *
  * @param height desired height (in pixels) of the YUV image.  If this is
  * different than the height of the JPEG image being decompressed, then
  * TurboJPEG will use scaling in the JPEG decompressor to generate the largest
- * possible image that will fit within the desired height.  If <tt>height</tt>
- * is set to 0, then only the width will be considered when determining the
- * scaled image size.  If the scaled height is not an even multiple of the MCU
- * block height (see #tjMCUHeight), then an intermediate buffer copy will be
- * performed within TurboJPEG.
+ * possible image that will fit within the desired height.  If `height` is set
+ * to 0, then only the width will be considered when determining the scaled
+ * image size.  If the scaled height is not an even multiple of the MCU block
+ * height (see #tjMCUHeight), then an intermediate buffer copy will be
+ * performed.
  *
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT
  * "flags"
@@ -1341,40 +1360,42 @@
 
 
 /**
- * Decode a YUV planar image into an RGB or grayscale image.  This function
- * uses the accelerated color conversion routines in the underlying
- * codec but does not execute any of the other steps in the JPEG decompression
- * process.
+ * Decode a unified planar YUV image into a packed-pixel RGB or grayscale
+ * image.  This function performs color conversion (which is accelerated in the
+ * libjpeg-turbo implementation) but does not execute any of the other steps in
+ * the JPEG decompression process.
  *
  * @param handle a handle to a TurboJPEG decompressor or transformer instance
  *
- * @param srcBuf pointer to an image buffer containing a YUV planar image to be
- * decoded.  The size of this buffer should match the value returned by
- * #tjBufSizeYUV2() for the given image width, height, padding, and level of
- * chrominance subsampling.  The Y, U (Cb), and V (Cr) image planes should be
- * stored sequentially in the source buffer (refer to @ref YUVnotes
- * "YUV Image Format Notes".)
+ * @param srcBuf pointer to a buffer containing a unified planar YUV source
+ * image to be decoded.  The size of this buffer should match the value
+ * returned by #tjBufSizeYUV2() for the given image width, height, row
+ * alignment, and level of chrominance subsampling.  The Y, U (Cb), and V (Cr)
+ * image planes should be stored sequentially in the source buffer.  (Refer to
+ * @ref YUVnotes "YUV Image Format Notes".)
  *
- * @param pad Use this parameter to specify that the width of each line in each
- * plane of the YUV source image is padded to the nearest multiple of this
- * number of bytes (must be a power of 2.)
+ * @param align row alignment (in bytes) of the YUV source image (must be a
+ * power of 2.)  Setting this parameter to n indicates that each row in each
+ * plane of the YUV source image is padded to the nearest multiple of n bytes
+ * (1 = unpadded.)
  *
  * @param subsamp the level of chrominance subsampling used in the YUV source
  * image (see @ref TJSAMP "Chrominance subsampling options".)
  *
- * @param dstBuf pointer to an image buffer that will receive the decoded
- * image.  This buffer should normally be <tt>pitch * height</tt> bytes in
- * size, but the <tt>dstBuf</tt> pointer can also be used to decode into a
- * specific region of a larger buffer.
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded
+ * image.  This buffer should normally be `pitch * height` bytes in size, but
+ * the `dstBuf` pointer can also be used to decode into a specific region of a
+ * larger buffer.
  *
  * @param width width (in pixels) of the source and destination images
  *
- * @param pitch bytes per line in the destination image.  Normally, this should
- * be <tt>width * #tjPixelSize[pixelFormat]</tt> if the destination image is
- * unpadded, or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line
- * of the destination image should be padded to the nearest 32-bit boundary, as
- * is the case for Windows bitmaps.  You can also be clever and use the pitch
- * parameter to skip lines, etc.  Setting this parameter to 0 is the equivalent
+ * @param pitch bytes per row in the destination image.  Normally this should
+ * be set to <tt>width * #tjPixelSize[pixelFormat]</tt>, if the destination
+ * image should be unpadded, or
+ * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each row of the
+ * destination image should be padded to the nearest multiple of 4 bytes, as is
+ * the case for Windows bitmaps.  You can also be clever and use the pitch
+ * parameter to skip rows, etc.  Setting this parameter to 0 is the equivalent
  * of setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.
  *
  * @param height height (in pixels) of the source and destination images
@@ -1389,16 +1410,16 @@
  * and #tjGetErrorCode().)
  */
 DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
-                          int pad, int subsamp, unsigned char *dstBuf,
+                          int align, int subsamp, unsigned char *dstBuf,
                           int width, int pitch, int height, int pixelFormat,
                           int flags);
 
 
 /**
- * Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale
- * image.  This function uses the accelerated color conversion routines in the
- * underlying codec but does not execute any of the other steps in the JPEG
- * decompression process.
+ * Decode a set of Y, U (Cb), and V (Cr) image planes into a packed-pixel RGB
+ * or grayscale image.  This function performs color conversion (which is
+ * accelerated in the libjpeg-turbo implementation) but does not execute any of
+ * the other steps in the JPEG decompression process.
  *
  * @param handle a handle to a TurboJPEG decompressor or transformer instance
  *
@@ -1411,29 +1432,30 @@
  * details.
  *
  * @param strides an array of integers, each specifying the number of bytes per
- * line in the corresponding plane of the YUV source image.  Setting the stride
+ * row in the corresponding plane of the YUV source image.  Setting the stride
  * for any plane to 0 is the same as setting it to the plane width (see
- * @ref YUVnotes "YUV Image Format Notes".)  If <tt>strides</tt> is NULL, then
- * the strides for all planes will be set to their respective plane widths.
- * You can adjust the strides in order to specify an arbitrary amount of line
- * padding in each plane or to decode a subregion of a larger YUV planar image.
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective plane widths.  You
+ * can adjust the strides in order to specify an arbitrary amount of row
+ * padding in each plane or to decode a subregion of a larger planar YUV image.
  *
  * @param subsamp the level of chrominance subsampling used in the YUV source
  * image (see @ref TJSAMP "Chrominance subsampling options".)
  *
- * @param dstBuf pointer to an image buffer that will receive the decoded
- * image.  This buffer should normally be <tt>pitch * height</tt> bytes in
- * size, but the <tt>dstBuf</tt> pointer can also be used to decode into a
- * specific region of a larger buffer.
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded
+ * image.  This buffer should normally be `pitch * height` bytes in size, but
+ * the `dstBuf` pointer can also be used to decode into a specific region of a
+ * larger buffer.
  *
  * @param width width (in pixels) of the source and destination images
  *
- * @param pitch bytes per line in the destination image.  Normally, this should
- * be <tt>width * #tjPixelSize[pixelFormat]</tt> if the destination image is
- * unpadded, or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line
- * of the destination image should be padded to the nearest 32-bit boundary, as
- * is the case for Windows bitmaps.  You can also be clever and use the pitch
- * parameter to skip lines, etc.  Setting this parameter to 0 is the equivalent
+ * @param pitch bytes per row in the destination image.  Normally this should
+ * be set to <tt>width * #tjPixelSize[pixelFormat]</tt>, if the destination
+ * image should be unpadded, or
+ * <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each row of the
+ * destination image should be padded to the nearest multiple of 4 bytes, as is
+ * the case for Windows bitmaps.  You can also be clever and use the pitch
+ * parameter to skip rows, etc.  Setting this parameter to 0 is the equivalent
  * of setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.
  *
  * @param height height (in pixels) of the source and destination images
@@ -1472,50 +1494,51 @@
  * transform requires reading and performing Huffman decoding on all of the
  * coefficients in the source image, regardless of the size of the destination
  * image.  Thus, this function provides a means of generating multiple
- * transformed images from the same source or  applying multiple
- * transformations simultaneously, in order to eliminate the need to read the
- * source coefficients multiple times.
+ * transformed images from the same source or applying multiple transformations
+ * simultaneously, in order to eliminate the need to read the source
+ * coefficients multiple times.
  *
  * @param handle a handle to a TurboJPEG transformer instance
  *
- * @param jpegBuf pointer to a buffer containing the JPEG source image to
+ * @param jpegBuf pointer to a byte buffer containing the JPEG source image to
  * transform
  *
  * @param jpegSize size of the JPEG source image (in bytes)
  *
  * @param n the number of transformed JPEG images to generate
  *
- * @param dstBufs pointer to an array of n image buffers.  <tt>dstBufs[i]</tt>
- * will receive a JPEG image that has been transformed using the parameters in
- * <tt>transforms[i]</tt>.  TurboJPEG has the ability to reallocate the JPEG
- * buffer to accommodate the size of the JPEG image.  Thus, you can choose to:
- * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and
- * let TurboJPEG grow the buffer as needed,
- * -# set <tt>dstBufs[i]</tt> to NULL to tell TurboJPEG to allocate the buffer
- * for you, or
+ * @param dstBufs pointer to an array of n byte buffers.  `dstBufs[i]` will
+ * receive a JPEG image that has been transformed using the parameters in
+ * `transforms[i]`.  TurboJPEG has the ability to reallocate the JPEG
+ * destination buffer to accommodate the size of the transformed JPEG image.
+ * Thus, you can choose to:
+ * -# pre-allocate the JPEG destination buffer with an arbitrary size using
+ * #tjAlloc() and let TurboJPEG grow the buffer as needed,
+ * -# set `dstBufs[i]` to NULL to tell TurboJPEG to allocate the buffer for
+ * you, or
  * -# pre-allocate the buffer to a "worst case" size determined by calling
  * #tjBufSize() with the transformed or cropped width and height.  Under normal
  * circumstances, this should ensure that the buffer never has to be
- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.)  Note,
- * however, that there are some rare cases (such as transforming images with a
- * large amount of embedded EXIF or ICC profile data) in which the output image
- * will be larger than the worst-case size, and #TJFLAG_NOREALLOC cannot be
- * used in those cases.
+ * re-allocated.  (Setting #TJFLAG_NOREALLOC guarantees that it won't be.)
+ * Note, however, that there are some rare cases (such as transforming images
+ * with a large amount of embedded EXIF or ICC profile data) in which the
+ * transformed JPEG image will be larger than the worst-case size, and
+ * #TJFLAG_NOREALLOC cannot be used in those cases.
  * .
- * If you choose option 1, <tt>dstSizes[i]</tt> should be set to the size of
- * your pre-allocated buffer.  In any case, unless you have set
- * #TJFLAG_NOREALLOC, you should always check <tt>dstBufs[i]</tt> upon return
- * from this function, as it may have changed.
+ * If you choose option 1, then `dstSizes[i]` should be set to the size of your
+ * pre-allocated buffer.  In any case, unless you have set #TJFLAG_NOREALLOC,
+ * you should always check `dstBufs[i]` upon return from this function, as it
+ * may have changed.
  *
  * @param dstSizes pointer to an array of n unsigned long variables that will
  * receive the actual sizes (in bytes) of each transformed JPEG image.  If
- * <tt>dstBufs[i]</tt> points to a pre-allocated buffer, then
- * <tt>dstSizes[i]</tt> should be set to the size of the buffer.  Upon return,
- * <tt>dstSizes[i]</tt> will contain the size of the JPEG image (in bytes.)
+ * `dstBufs[i]` points to a pre-allocated buffer, then `dstSizes[i]` should be
+ * set to the size of the buffer.  Upon return, `dstSizes[i]` will contain the
+ * size of the transformed JPEG image (in bytes.)
  *
  * @param transforms pointer to an array of n #tjtransform structures, each of
  * which specifies the transform parameters and/or cropping region for the
- * corresponding transformed output image.
+ * corresponding transformed JPEG image.
  *
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT
  * "flags"
@@ -1541,10 +1564,10 @@
 
 
 /**
- * Allocate an image buffer for use with TurboJPEG.  You should always use
- * this function to allocate the JPEG destination buffer(s) for the compression
- * and transform functions unless you are disabling automatic buffer
- * (re)allocation (by setting #TJFLAG_NOREALLOC.)
+ * Allocate a byte buffer for use with TurboJPEG.  You should always use this
+ * function to allocate the JPEG destination buffer(s) for the compression and
+ * transform functions unless you are disabling automatic buffer (re)allocation
+ * (by setting #TJFLAG_NOREALLOC.)
  *
  * @param bytes the number of bytes to allocate
  *
@@ -1557,44 +1580,43 @@
 
 
 /**
- * Load an uncompressed image from disk into memory.
+ * Load a packed-pixel image from disk into memory.
  *
- * @param filename name of a file containing an uncompressed image in Windows
+ * @param filename name of a file containing a packed-pixel image in Windows
  * BMP or PBMPLUS (PPM/PGM) format
  *
  * @param width pointer to an integer variable that will receive the width (in
- * pixels) of the uncompressed image
+ * pixels) of the packed-pixel image
  *
- * @param align row alignment of the image buffer to be returned (must be a
- * power of 2.)  For instance, setting this parameter to 4 will cause all rows
- * in the image buffer to be padded to the nearest 32-bit boundary, and setting
- * this parameter to 1 will cause all rows in the image buffer to be unpadded.
+ * @param align row alignment of the packed-pixel buffer to be returned (must
+ * be a power of 2.)  Setting this parameter to n will cause all rows in the
+ * buffer to be padded to the nearest multiple of n bytes (1 = unpadded.)
  *
  * @param height pointer to an integer variable that will receive the height
- * (in pixels) of the uncompressed image
+ * (in pixels) of the packed-pixel image
  *
  * @param pixelFormat pointer to an integer variable that specifies or will
- * receive the pixel format of the uncompressed image buffer.  The behavior of
- * #tjLoadImage() will vary depending on the value of <tt>*pixelFormat</tt>
- * passed to the function:
- * - @ref TJPF_UNKNOWN : The uncompressed image buffer returned by the function
- * will use the most optimal pixel format for the file type, and
- * <tt>*pixelFormat</tt> will contain the ID of this pixel format upon
- * successful return from the function.
- * - @ref TJPF_GRAY : Only PGM files and 8-bit BMP files with a grayscale
- * colormap can be loaded.
+ * receive the pixel format of the packed-pixel buffer.  The behavior of
+ * #tjLoadImage() will vary depending on the value of `*pixelFormat` passed to
+ * the function:
+ * - @ref TJPF_UNKNOWN : The packed-pixel buffer returned by this function will
+ * use the most optimal pixel format for the file type, and `*pixelFormat` will
+ * contain the ID of that pixel format upon successful return from this
+ * function.
+ * - @ref TJPF_GRAY : Only PGM files and 8-bit-per-pixel BMP files with a
+ * grayscale colormap can be loaded.
  * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be
  * converted using a quick & dirty algorithm that is suitable only for testing
- * purposes (proper conversion between CMYK and other formats requires a color
- * management system.)
- * - Other @ref TJPF "pixel formats" : The uncompressed image buffer will use
- * the specified pixel format, and pixel format conversion will be performed if
+ * purposes.  (Proper conversion between CMYK and other formats requires a
+ * color management system.)
+ * - Other @ref TJPF "pixel formats" : The packed-pixel buffer will use the
+ * specified pixel format, and pixel format conversion will be performed if
  * necessary.
  *
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
  * "flags".
  *
- * @return a pointer to a newly-allocated buffer containing the uncompressed
+ * @return a pointer to a newly-allocated buffer containing the packed-pixel
  * image, converted to the chosen pixel format and with the chosen row
  * alignment, or NULL if an error occurred (see #tjGetErrorStr2().)  This
  * buffer should be freed using #tjFree().
@@ -1605,31 +1627,31 @@
 
 
 /**
- * Save an uncompressed image from memory to disk.
+ * Save a packed-pixel image from memory to disk.
  *
- * @param filename name of a file to which to save the uncompressed image.
- * The image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format,
- * depending on the file extension.
+ * @param filename name of a file to which to save the packed-pixel image.  The
+ * image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, depending
+ * on the file extension.
  *
- * @param buffer pointer to an image buffer containing RGB, grayscale, or
- * CMYK pixels to be saved
+ * @param buffer pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK image to be saved
  *
- * @param width width (in pixels) of the uncompressed image
+ * @param width width (in pixels) of the packed-pixel image
  *
- * @param pitch bytes per line in the image buffer.  Setting this parameter to
- * 0 is the equivalent of setting it to
+ * @param pitch bytes per row in the packed-pixel image.  Setting this
+ * parameter to 0 is the equivalent of setting it to
  * <tt>width * #tjPixelSize[pixelFormat]</tt>.
  *
- * @param height height (in pixels) of the uncompressed image
+ * @param height height (in pixels) of the packed-pixel image
  *
- * @param pixelFormat pixel format of the image buffer (see @ref TJPF
+ * @param pixelFormat pixel format of the packed-pixel image (see @ref TJPF
  * "Pixel formats".)  If this parameter is set to @ref TJPF_GRAY, then the
- * image will be stored in PGM or 8-bit (indexed color) BMP format.  Otherwise,
- * the image will be stored in PPM or 24-bit BMP format.  If this parameter
- * is set to @ref TJPF_CMYK, then the CMYK pixels will be converted to RGB
- * using a quick & dirty algorithm that is suitable only for testing (proper
- * conversion between CMYK and other formats requires a color management
- * system.)
+ * image will be stored in PGM or 8-bit-per-pixel (indexed color) BMP format.
+ * Otherwise, the image will be stored in PPM or 24-bit-per-pixel BMP format.
+ * If this parameter is set to @ref TJPF_CMYK, then the CMYK pixels will be
+ * converted to RGB using a quick & dirty algorithm that is suitable only for
+ * testing purposes.  (Proper conversion between CMYK and other formats
+ * requires a color management system.)
  *
  * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
  * "flags".
@@ -1642,8 +1664,8 @@
 
 
 /**
- * Free an image buffer previously allocated by TurboJPEG.  You should always
- * use this function to free JPEG destination buffer(s) that were automatically
+ * Free a byte buffer previously allocated by TurboJPEG.  You should always use
+ * this function to free JPEG destination buffer(s) that were automatically
  * (re)allocated by the compression and transform functions or that were
  * manually allocated using #tjAlloc().
  *
@@ -1681,14 +1703,10 @@
 DLLEXPORT int tjGetErrorCode(tjhandle handle);
 
 
-/* Deprecated functions and macros */
-#define TJFLAG_FORCEMMX  8
-#define TJFLAG_FORCESSE  16
-#define TJFLAG_FORCESSE2  32
-#define TJFLAG_FORCESSE3  128
-
-
 /* Backward compatibility functions and macros (nothing to see here) */
+
+/* TurboJPEG 1.0+ */
+
 #define NUMSUBOPT  TJ_NUMSAMP
 #define TJ_444  TJSAMP_444
 #define TJ_422  TJSAMP_422
@@ -1704,46 +1722,55 @@
 #define TJ_ALPHAFIRST  64
 #define TJ_FORCESSE3  TJFLAG_FORCESSE3
 #define TJ_FASTUPSAMPLE  TJFLAG_FASTUPSAMPLE
-#define TJ_YUV  512
 
 DLLEXPORT unsigned long TJBUFSIZE(int width, int height);
 
-DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp);
-
-DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp);
-
 DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width,
                          int pitch, int height, int pixelSize,
                          unsigned char *dstBuf, unsigned long *compressedSize,
                          int jpegSubsamp, int jpegQual, int flags);
 
-DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
-                          int pitch, int height, int pixelSize,
-                          unsigned char *dstBuf, int subsamp, int flags);
-
-DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
-                           int pitch, int height, int pixelFormat,
-                           unsigned char *dstBuf, int subsamp, int flags);
-
-DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf,
-                                 unsigned long jpegSize, int *width,
-                                 int *height);
-
-DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf,
-                                  unsigned long jpegSize, int *width,
-                                  int *height, int *jpegSubsamp);
-
 DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
                            unsigned long jpegSize, unsigned char *dstBuf,
                            int width, int pitch, int height, int pixelSize,
                            int flags);
 
+DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf,
+                                 unsigned long jpegSize, int *width,
+                                 int *height);
+
+DLLEXPORT char *tjGetErrorStr(void);
+
+/* TurboJPEG 1.1+ */
+
+#define TJ_YUV  512
+
+DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp);
+
+DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp);
+
 DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf,
                                 unsigned long jpegSize, unsigned char *dstBuf,
                                 int flags);
 
-DLLEXPORT char *tjGetErrorStr(void);
+DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
+                          int pitch, int height, int pixelSize,
+                          unsigned char *dstBuf, int subsamp, int flags);
 
+/* TurboJPEG 1.2+ */
+
+#define TJFLAG_FORCEMMX  8
+#define TJFLAG_FORCESSE  16
+#define TJFLAG_FORCESSE2  32
+#define TJFLAG_FORCESSE3  128
+
+DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp);
+
+DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
+                           int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int subsamp, int flags);
 
 /**
  * @}
diff --git a/usage.txt b/usage.txt
index f7fa3c0..0b6036a 100644
--- a/usage.txt
+++ b/usage.txt
@@ -25,7 +25,7 @@
 We provide two programs, cjpeg to compress an image file into JPEG format,
 and djpeg to decompress a JPEG file back into a conventional image format.
 
-On Unix-like systems, you say:
+On most systems, you say:
         cjpeg [switches] [imagefile] >jpegfile
 or
         djpeg [switches] [jpegfile]  >imagefile
@@ -34,19 +34,19 @@
 standard error).  These conventions are handy for piping images between
 programs.
 
-On most non-Unix systems, you say:
+If you defined TWO_FILE_COMMANDLINE when compiling the programs, you can
+instead say:
         cjpeg [switches] imagefile jpegfile
 or
         djpeg [switches] jpegfile  imagefile
 i.e., both the input and output files are named on the command line.  This
 style is a little more foolproof, and it loses no functionality if you don't
-have pipes.  (You can get this style on Unix too, if you prefer, by defining
-TWO_FILE_COMMANDLINE when you compile the programs; see install.txt.)
+have pipes.
 
 You can also say:
         cjpeg [switches] -outfile jpegfile  imagefile
 or
-        djpeg [switches] -outfile imagefile  jpegfile
+        djpeg [switches] -outfile imagefile jpegfile
 This syntax works on all systems, so it is useful for scripts.
 
 The currently supported image file formats are: PPM (PBMPLUS color format),
@@ -72,12 +72,9 @@
                           Quality is 0 (worst) to 100 (best); default is 75.
                           (See below for more info.)
 
-        -grayscale      Create monochrome JPEG file from color input.
-                        Be sure to use this switch when compressing a grayscale
-                        BMP or GIF file, because cjpeg isn't bright enough to
-                        notice whether a BMP or GIF file uses only shades of
-                        gray.  By saying -grayscale, you'll get a smaller JPEG
-                        file that takes less time to process.
+        -grayscale      Create monochrome JPEG file from color input.  By
+                        saying -grayscale, you'll get a smaller JPEG file that
+                        takes less time to process.
 
         -rgb            Create RGB JPEG file.
                         Using this switch suppresses the conversion from RGB
@@ -146,8 +143,8 @@
 assigned to components with the -qslots option (see the "wizard" switches
 below.)
 
-JPEG  files  generated  with separate luminance and chrominance quality are
-fully compliant with standard JPEG decoders.
+JPEG files generated with separate luminance and chrominance quality are fully
+compliant with standard JPEG decoders.
 
 CAUTION: For this setting to be useful, be sure to pass an argument of
 -sample 1x1 to cjpeg to disable chrominance subsampling.  Otherwise, the
@@ -221,7 +218,7 @@
                         space is needed, an error will occur.
 
         -verbose        Enable debug printout.  More -v's give more printout.
-        or  -debug      Also, version information is printed at startup.
+        or -debug       Also, version information is printed at startup.
 
 The -restart option inserts extra markers that allow a JPEG decoder to
 resynchronize after a transmission error.  Without restart markers, any damage
@@ -470,9 +467,10 @@
 can be removed.  See the -copy option for specifics.
 
 jpegtran uses a command line syntax similar to cjpeg or djpeg.
-On Unix-like systems, you say:
+On most systems, you say:
         jpegtran [switches] [inputfile] >outputfile
-On most non-Unix systems, you say:
+If you defined TWO_FILE_COMMANDLINE when compiling the program, you can instead
+say:
         jpegtran [switches] inputfile outputfile
 where both the input and output files are JPEG files.
 
@@ -601,6 +599,9 @@
         -copy comments  Copy only comment markers.  This setting copies
                         comments from the source file but discards any other
                         metadata.
+        -copy icc       Copy only ICC profile markers.  This setting copies the
+                        ICC profile from the source file but discards any other
+                        metadata.
         -copy all       Copy all extra markers.  This setting preserves
                         miscellaneous markers found in the source file, such
                         as JFIF thumbnails, Exif data, and Photoshop settings.
@@ -649,13 +650,13 @@
 file by directing wrjpgcom's output back into it; on most systems this will
 just destroy your file.
 
-The command line syntax for wrjpgcom is similar to cjpeg's.  On Unix-like
-systems, it is
+The command line syntax for wrjpgcom is similar to cjpeg's.  On most systems,
+it is
         wrjpgcom [switches] [inputfilename]
 The output file is written to standard output.  The input file comes from
 the named file, or from standard input if no input file is named.
 
-On most non-Unix systems, the syntax is
+If you defined TWO_FILE_COMMANDLINE when compiling the program, the syntax is:
         wrjpgcom [switches] inputfilename outputfilename
 where both input and output file names must be given explicitly.
 
diff --git a/wizard.txt b/wizard.txt
index c57fe38..0e155f9 100644
--- a/wizard.txt
+++ b/wizard.txt
@@ -149,7 +149,15 @@
 Ss and Se values other than 0 and 63 appear.  (The -progressive switch is
 not needed to specify this; in fact, it is ignored when -scans appears.)
 The scan script must meet the JPEG restrictions on progression sequences.
-(cjpeg checks that the spec's requirements are obeyed.)
+(cjpeg checks that the spec's requirements are obeyed.)  More specifically:
+
+    * An AC scan cannot include coefficients from more than one component.
+
+    * An AC scan for a particular component must be preceded by a DC scan
+      that includes the same component.
+
+    * Only the first AC scan that includes a particular coefficient for a
+      particular component can include more than one bit from that coefficient.
 
 Scan script files are free format, in that arbitrary whitespace can appear
 between numbers and around punctuation.  Also, comments can be included: a
diff --git a/wrbmp.c b/wrbmp.c
index 408a722..45fff68 100644
--- a/wrbmp.c
+++ b/wrbmp.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014-2015, 2017, 2019, D. R. Commander.
+ * Copyright (C) 2014-2015, 2017, 2019, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -121,7 +121,7 @@
   inptr = dest->pub.buffer[0];
 
   if (cinfo->out_color_space == JCS_EXT_BGR) {
-    MEMCOPY(outptr, inptr, dest->row_width);
+    memcpy(outptr, inptr, dest->row_width);
     outptr += cinfo->output_width * 3;
   } else if (cinfo->out_color_space == JCS_RGB565) {
     boolean big_endian = is_big_endian();
@@ -165,7 +165,7 @@
     *outptr++ = 0;
 
   if (!dest->use_inversion_array)
-    (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->row_width);
+    fwrite(dest->iobuffer, 1, dest->row_width, dest->pub.output_file);
 }
 
 METHODDEF(void)
@@ -191,7 +191,7 @@
 
   /* Transfer data. */
   inptr = dest->pub.buffer[0];
-  MEMCOPY(outptr, inptr, cinfo->output_width);
+  memcpy(outptr, inptr, cinfo->output_width);
   outptr += cinfo->output_width;
 
   /* Zero out the pad bytes. */
@@ -200,7 +200,7 @@
     *outptr++ = 0;
 
   if (!dest->use_inversion_array)
-    (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->row_width);
+    fwrite(dest->iobuffer, 1, dest->row_width, dest->pub.output_file);
 }
 
 
@@ -256,8 +256,8 @@
   bfSize = headersize + (long)dest->row_width * (long)cinfo->output_height;
 
   /* Set unused fields of header to 0 */
-  MEMZERO(bmpfileheader, sizeof(bmpfileheader));
-  MEMZERO(bmpinfoheader, sizeof(bmpinfoheader));
+  memset(bmpfileheader, 0, sizeof(bmpfileheader));
+  memset(bmpinfoheader, 0, sizeof(bmpinfoheader));
 
   /* Fill the file header */
   bmpfileheader[0] = 0x42;      /* first 2 bytes are ASCII 'B', 'M' */
@@ -281,9 +281,9 @@
   PUT_2B(bmpinfoheader, 32, cmap_entries); /* biClrUsed */
   /* we leave biClrImportant = 0 */
 
-  if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t)14)
+  if (fwrite(bmpfileheader, 1, 14, dest->pub.output_file) != (size_t)14)
     ERREXIT(cinfo, JERR_FILE_WRITE);
-  if (JFWRITE(dest->pub.output_file, bmpinfoheader, 40) != (size_t)40)
+  if (fwrite(bmpinfoheader, 1, 40, dest->pub.output_file) != (size_t)40)
     ERREXIT(cinfo, JERR_FILE_WRITE);
 
   if (cmap_entries > 0)
@@ -325,8 +325,8 @@
   bfSize = headersize + (long)dest->row_width * (long)cinfo->output_height;
 
   /* Set unused fields of header to 0 */
-  MEMZERO(bmpfileheader, sizeof(bmpfileheader));
-  MEMZERO(bmpcoreheader, sizeof(bmpcoreheader));
+  memset(bmpfileheader, 0, sizeof(bmpfileheader));
+  memset(bmpcoreheader, 0, sizeof(bmpcoreheader));
 
   /* Fill the file header */
   bmpfileheader[0] = 0x42;      /* first 2 bytes are ASCII 'B', 'M' */
@@ -342,9 +342,9 @@
   PUT_2B(bmpcoreheader, 8, 1);  /* bcPlanes - must be 1 */
   PUT_2B(bmpcoreheader, 10, bits_per_pixel); /* bcBitCount */
 
-  if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t)14)
+  if (fwrite(bmpfileheader, 1, 14, dest->pub.output_file) != (size_t)14)
     ERREXIT(cinfo, JERR_FILE_WRITE);
-  if (JFWRITE(dest->pub.output_file, bmpcoreheader, 12) != (size_t)12)
+  if (fwrite(bmpcoreheader, 1, 12, dest->pub.output_file) != (size_t)12)
     ERREXIT(cinfo, JERR_FILE_WRITE);
 
   if (cmap_entries > 0)
@@ -456,7 +456,7 @@
         ((j_common_ptr)cinfo, dest->whole_image, row - 1, (JDIMENSION)1,
          FALSE);
       data_ptr = image_ptr[0];
-      (void)JFWRITE(outfile, data_ptr, dest->row_width);
+      fwrite(data_ptr, 1, dest->row_width, outfile);
     }
     if (progress != NULL)
       progress->completed_extra_passes++;
diff --git a/wrgif.c b/wrgif.c
index 82a2429..620a3ba 100644
--- a/wrgif.c
+++ b/wrgif.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2015-2019 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, 2017, D. R. Commander.
+ * Copyright (C) 2015, 2017, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -114,8 +114,8 @@
 {
   if (dinfo->bytesinpkt > 0) {  /* never write zero-length packet */
     dinfo->packetbuf[0] = (char)dinfo->bytesinpkt++;
-    if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt) !=
-        (size_t)dinfo->bytesinpkt)
+    if (fwrite(dinfo->packetbuf, 1, dinfo->bytesinpkt,
+               dinfo->pub.output_file) != (size_t)dinfo->bytesinpkt)
       ERREXIT(dinfo->cinfo, JERR_FILE_WRITE);
     dinfo->bytesinpkt = 0;
   }
@@ -169,7 +169,7 @@
 /* Fill the hash table with empty entries */
 {
   /* It's sufficient to zero hash_code[] */
-  MEMZERO(dinfo->hash_code, HSIZE * sizeof(code_int));
+  memset(dinfo->hash_code, 0, HSIZE * sizeof(code_int));
 }
 
 
diff --git a/wrjpgcom.1 b/wrjpgcom.1
deleted file mode 100644
index a255cab..0000000
--- a/wrjpgcom.1
+++ /dev/null
@@ -1,103 +0,0 @@
-.TH WRJPGCOM 1 "15 June 1995"
-.SH NAME
-wrjpgcom \- insert text comments into a JPEG file
-.SH SYNOPSIS
-.B wrjpgcom
-[
-.B \-replace
-]
-[
-.BI \-comment " text"
-]
-[
-.BI \-cfile " name"
-]
-[
-.I filename
-]
-.LP
-.SH DESCRIPTION
-.LP
-.B wrjpgcom
-reads the named JPEG/JFIF file, or the standard input if no file is named,
-and generates a new JPEG/JFIF file on standard output.  A comment block is
-added to the file.
-.PP
-The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file.
-Although the standard doesn't actually define what COM blocks are for, they
-are widely used to hold user-supplied text strings.  This lets you add
-annotations, titles, index terms, etc to your JPEG files, and later retrieve
-them as text.  COM blocks do not interfere with the image stored in the JPEG
-file.  The maximum size of a COM block is 64K, but you can have as many of
-them as you like in one JPEG file.
-.PP
-.B wrjpgcom
-adds a COM block, containing text you provide, to a JPEG file.
-Ordinarily, the COM block is added after any existing COM blocks; but you
-can delete the old COM blocks if you wish.
-.SH OPTIONS
-Switch names may be abbreviated, and are not case sensitive.
-.TP
-.B \-replace
-Delete any existing COM blocks from the file.
-.TP
-.BI \-comment " text"
-Supply text for new COM block on command line.
-.TP
-.BI \-cfile " name"
-Read text for new COM block from named file.
-.PP
-If you have only one line of comment text to add, you can provide it on the
-command line with
-.BR \-comment .
-The comment text must be surrounded with quotes so that it is treated as a
-single argument.  Longer comments can be read from a text file.
-.PP
-If you give neither
-.B \-comment
-nor
-.BR \-cfile,
-then
-.B wrjpgcom
-will read the comment text from standard input.  (In this case an input image
-file name MUST be supplied, so that the source JPEG file comes from somewhere
-else.)  You can enter multiple lines, up to 64KB worth.  Type an end-of-file
-indicator (usually control-D) to terminate the comment text entry.
-.PP
-.B wrjpgcom
-will not add a COM block if the provided comment string is empty.  Therefore
-\fB\-replace \-comment ""\fR can be used to delete all COM blocks from a file.
-.SH EXAMPLES
-.LP
-Add a short comment to in.jpg, producing out.jpg:
-.IP
-.B wrjpgcom \-c
-\fI"View of my back yard" in.jpg
-.B >
-.I out.jpg
-.PP
-Attach a long comment previously stored in comment.txt:
-.IP
-.B wrjpgcom
-.I in.jpg
-.B <
-.I comment.txt
-.B >
-.I out.jpg
-.PP
-or equivalently
-.IP
-.B wrjpgcom
-.B -cfile
-.I comment.txt
-.B <
-.I in.jpg
-.B >
-.I out.jpg
-.SH SEE ALSO
-.BR cjpeg (1),
-.BR djpeg (1),
-.BR jpegtran (1),
-.BR rdjpgcom (1)
-.SH AUTHOR
-Independent JPEG Group
diff --git a/wrjpgcom.c b/wrjpgcom.c
index 8a4e741..060925f 100644
--- a/wrjpgcom.c
+++ b/wrjpgcom.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2014, D. R. Commander.
+ * Copyright (C) 2014, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -14,12 +14,13 @@
  * JPEG markers.
  */
 
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
 #define JPEG_CJPEG_DJPEG        /* to get the command-line config symbols */
 #include "jinclude.h"           /* get auto-config symbols, <stdio.h> */
 
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc() */
-extern void *malloc();
-#endif
 #include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef USE_SETMODE
 #include <fcntl.h>              /* to declare setmode()'s parameter macros */
@@ -27,16 +28,6 @@
 #include <io.h>                 /* to declare setmode() */
 #endif
 
-#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
-#ifdef __MWERKS__
-#include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>            /* ... and this */
-#endif
-#ifdef THINK_C
-#include <console.h>            /* Think declares it here */
-#endif
-#endif
-
 #ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
 #define READ_BINARY     "r"
 #define WRITE_BINARY    "w"
@@ -414,11 +405,6 @@
   unsigned int comment_length = 0;
   int marker;
 
-  /* On Mac, fetch a command line. */
-#ifdef USE_CCOMMAND
-  argc = ccommand(&argv);
-#endif
-
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
     progname = "wrjpgcom";      /* in case C library doesn't provide it */
diff --git a/wrppm.c b/wrppm.c
index 3081ec3..57c8aaf 100644
--- a/wrppm.c
+++ b/wrppm.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * Modified 2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2017, 2019-2020, D. R. Commander.
+ * Copyright (C) 2017, 2019-2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -92,7 +92,7 @@
 {
   ppm_dest_ptr dest = (ppm_dest_ptr)dinfo;
 
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 
@@ -115,13 +115,13 @@
   ptr = dest->pub.buffer[0];
   bufferptr = dest->iobuffer;
 #if BITS_IN_JSAMPLE == 8
-  MEMCOPY(bufferptr, ptr, dest->samples_per_row);
+  memcpy(bufferptr, ptr, dest->samples_per_row);
 #else
   for (col = dest->samples_per_row; col > 0; col--) {
     PUTPPMSAMPLE(bufferptr, *ptr++);
   }
 #endif
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 
@@ -149,7 +149,7 @@
     PUTPPMSAMPLE(bufferptr, ptr[bindex]);
     ptr += ps;
   }
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 
@@ -175,7 +175,7 @@
     PUTPPMSAMPLE(bufferptr, g);
     PUTPPMSAMPLE(bufferptr, b);
   }
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 
@@ -205,7 +205,7 @@
     PUTPPMSAMPLE(bufferptr, color_map1[pixval]);
     PUTPPMSAMPLE(bufferptr, color_map2[pixval]);
   }
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 
@@ -224,7 +224,7 @@
   for (col = cinfo->output_width; col > 0; col--) {
     PUTPPMSAMPLE(bufferptr, color_map[*ptr++]);
   }
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 
diff --git a/wrtarga.c b/wrtarga.c
index 7a654ff..67ca1f0 100644
--- a/wrtarga.c
+++ b/wrtarga.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2017, 2019, D. R. Commander.
+ * Copyright (C) 2017, 2019, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -51,7 +51,7 @@
   char targaheader[18];
 
   /* Set unused fields of header to 0 */
-  MEMZERO(targaheader, sizeof(targaheader));
+  memset(targaheader, 0, sizeof(targaheader));
 
   if (num_colors > 0) {
     targaheader[1] = 1;         /* color map type 1 */
@@ -79,7 +79,7 @@
     }
   }
 
-  if (JFWRITE(dinfo->output_file, targaheader, 18) != (size_t)18)
+  if (fwrite(targaheader, 1, 18, dinfo->output_file) != (size_t)18)
     ERREXIT(cinfo, JERR_FILE_WRITE);
 }
 
@@ -107,7 +107,7 @@
     outptr[2] = inptr[0];
     inptr += 3, outptr += 3;
   }
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 METHODDEF(void)
@@ -121,8 +121,8 @@
 
   inptr = dest->pub.buffer[0];
   outptr = dest->iobuffer;
-  MEMCOPY(outptr, inptr, cinfo->output_width);
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  memcpy(outptr, inptr, cinfo->output_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }
 
 
@@ -146,7 +146,7 @@
   for (col = cinfo->output_width; col > 0; col--) {
     *outptr++ = color_map0[*inptr++];
   }
-  (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width);
+  fwrite(dest->iobuffer, 1, dest->buffer_width, dest->pub.output_file);
 }