Upgrade libopus to v1.5

This project was upgraded with external_updater.
Usage: tools/external_updater/updater.sh update external/libopus
For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md

Test: TreeHugger
Change-Id: I5e8ccb33e729bd87984107ac9fc99987d8ff79f4
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..28f3797
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,389 @@
+package {
+    default_applicable_licenses: ["external_libopus_license"],
+}
+
+// Added automatically by a large-scale-change that took the approach of
+// 'apply every license found to every target'. While this makes sure we respect
+// every license restriction, it may not be entirely correct.
+//
+// e.g. GPL in an MIT project might only apply to the contrib/ directory.
+//
+// Please consider splitting the single license below into multiple licenses,
+// taking care not to lose any license_kind information, and overriding the
+// default license using the 'licenses: [...]' property on targets as needed.
+//
+// For unused files, consider creating a 'filegroup' with "//visibility:private"
+// to attach the license to, and including a comment whether the files may be
+// used in the current project.
+//
+// large-scale-change included anything that looked like it might be a license
+// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
+//
+// Please consider removing redundant or irrelevant files from 'license_text:'.
+// http://go/android-license-faq
+license {
+    name: "external_libopus_license",
+    visibility: [":__subpackages__"],
+    license_kinds: [
+        "SPDX-license-identifier-Apache-2.0",
+        "SPDX-license-identifier-BSD",
+    ],
+    license_text: [
+        "COPYING",
+        "NOTICE",
+    ],
+}
+
+cc_library {
+    name: "libopus",
+    vendor_available: true,
+    host_supported: true,
+
+    export_include_dirs: ["include"],
+
+    local_include_dirs: [
+        "src",
+        "silk",
+        "celt",
+        "silk/fixed",
+    ],
+
+    sanitize: {
+        integer_overflow: true,
+        misc_undefined: ["bounds"],
+        blocklist: "libopus_blocklist.txt",
+    },
+
+    srcs: [
+        // CELT_SOURCES
+        "celt/bands.c",
+        "celt/celt.c",
+        "celt/celt_encoder.c",
+        "celt/celt_decoder.c",
+        "celt/cwrs.c",
+        "celt/entcode.c",
+        "celt/entdec.c",
+        "celt/entenc.c",
+        "celt/kiss_fft.c",
+        "celt/laplace.c",
+        "celt/mathops.c",
+        "celt/mdct.c",
+        "celt/modes.c",
+        "celt/pitch.c",
+        "celt/celt_lpc.c",
+        "celt/quant_bands.c",
+        "celt/rate.c",
+        "celt/vq.c",
+
+        // SILK_SOURCES
+        "silk/CNG.c",
+        "silk/code_signs.c",
+        "silk/init_decoder.c",
+        "silk/decode_core.c",
+        "silk/decode_frame.c",
+        "silk/decode_parameters.c",
+        "silk/decode_indices.c",
+        "silk/decode_pulses.c",
+        "silk/decoder_set_fs.c",
+        "silk/dec_API.c",
+        "silk/enc_API.c",
+        "silk/encode_indices.c",
+        "silk/encode_pulses.c",
+        "silk/gain_quant.c",
+        "silk/interpolate.c",
+        "silk/LP_variable_cutoff.c",
+        "silk/NLSF_decode.c",
+        "silk/NSQ.c",
+        "silk/NSQ_del_dec.c",
+        "silk/PLC.c",
+        "silk/shell_coder.c",
+        "silk/tables_gain.c",
+        "silk/tables_LTP.c",
+        "silk/tables_NLSF_CB_NB_MB.c",
+        "silk/tables_NLSF_CB_WB.c",
+        "silk/tables_other.c",
+        "silk/tables_pitch_lag.c",
+        "silk/tables_pulses_per_block.c",
+        "silk/VAD.c",
+        "silk/control_audio_bandwidth.c",
+        "silk/quant_LTP_gains.c",
+        "silk/VQ_WMat_EC.c",
+        "silk/HP_variable_cutoff.c",
+        "silk/NLSF_encode.c",
+        "silk/NLSF_VQ.c",
+        "silk/NLSF_unpack.c",
+        "silk/NLSF_del_dec_quant.c",
+        "silk/process_NLSFs.c",
+        "silk/stereo_LR_to_MS.c",
+        "silk/stereo_MS_to_LR.c",
+        "silk/check_control_input.c",
+        "silk/control_SNR.c",
+        "silk/init_encoder.c",
+        "silk/control_codec.c",
+        "silk/A2NLSF.c",
+        "silk/ana_filt_bank_1.c",
+        "silk/biquad_alt.c",
+        "silk/bwexpander_32.c",
+        "silk/bwexpander.c",
+        "silk/debug.c",
+        "silk/decode_pitch.c",
+        "silk/inner_prod_aligned.c",
+        "silk/lin2log.c",
+        "silk/log2lin.c",
+        "silk/LPC_analysis_filter.c",
+        "silk/LPC_fit.c",
+        "silk/LPC_inv_pred_gain.c",
+        "silk/table_LSF_cos.c",
+        "silk/NLSF2A.c",
+        "silk/NLSF_stabilize.c",
+        "silk/NLSF_VQ_weights_laroia.c",
+        "silk/pitch_est_tables.c",
+        "silk/resampler.c",
+        "silk/resampler_down2_3.c",
+        "silk/resampler_down2.c",
+        "silk/resampler_private_AR2.c",
+        "silk/resampler_private_down_FIR.c",
+        "silk/resampler_private_IIR_FIR.c",
+        "silk/resampler_private_up2_HQ.c",
+        "silk/resampler_rom.c",
+        "silk/sigm_Q15.c",
+        "silk/sort.c",
+        "silk/sum_sqr_shift.c",
+        "silk/stereo_decode_pred.c",
+        "silk/stereo_encode_pred.c",
+        "silk/stereo_find_predictor.c",
+        "silk/stereo_quant_pred.c",
+
+        // SILK_SOURCES_FIXED
+        "silk/fixed/LTP_analysis_filter_FIX.c",
+        "silk/fixed/LTP_scale_ctrl_FIX.c",
+        "silk/fixed/corrMatrix_FIX.c",
+        "silk/fixed/encode_frame_FIX.c",
+        "silk/fixed/find_LPC_FIX.c",
+        "silk/fixed/find_LTP_FIX.c",
+        "silk/fixed/find_pitch_lags_FIX.c",
+        "silk/fixed/find_pred_coefs_FIX.c",
+        "silk/fixed/noise_shape_analysis_FIX.c",
+        "silk/fixed/process_gains_FIX.c",
+        "silk/fixed/regularize_correlations_FIX.c",
+        "silk/fixed/residual_energy16_FIX.c",
+        "silk/fixed/residual_energy_FIX.c",
+        "silk/fixed/warped_autocorrelation_FIX.c",
+        "silk/fixed/apply_sine_window_FIX.c",
+        "silk/fixed/autocorr_FIX.c",
+        "silk/fixed/burg_modified_FIX.c",
+        "silk/fixed/k2a_FIX.c",
+        "silk/fixed/k2a_Q16_FIX.c",
+        "silk/fixed/pitch_analysis_core_FIX.c",
+        "silk/fixed/vector_ops_FIX.c",
+        "silk/fixed/schur64_FIX.c",
+        "silk/fixed/schur_FIX.c",
+
+        // OPUS_SOURCES
+        "src/mapping_matrix.c",
+        "src/opus.c",
+        "src/opus_decoder.c",
+        "src/opus_encoder.c",
+        "src/opus_multistream.c",
+        "src/opus_multistream_encoder.c",
+        "src/opus_multistream_decoder.c",
+        "src/opus_projection_encoder.c",
+        "src/opus_projection_decoder.c",
+        "src/repacketizer.c",
+        "src/extensions.c",
+        // OPUS_SOURCES_FLOAT
+        "src/analysis.c",
+        "src/mlp.c",
+        "src/mlp_data.c",
+    ],
+
+    cflags: [
+        "-DNULL=0",
+        "-DSOCKLEN_T=socklen_t",
+        "-DLOCALE_NOT_USED",
+        "-D_LARGEFILE_SOURCE=1",
+        "-D_FILE_OFFSET_BITS=64",
+        "-Drestrict=",
+        "-D__EMX__",
+        "-DOPUS_BUILD",
+        "-DFIXED_POINT",
+        "-DUSE_ALLOCA",
+        "-DSIMD_EXTRA_ALLOC_BYTES=16",
+        "-DHAVE_LRINT",
+        "-DHAVE_LRINTF",
+        "-DENABLE_HARDENING",
+        "-O2",
+        "-fno-math-errno",
+        "-Wall",
+        "-Werror",
+    ],
+    cppflags: [
+        "-DBSD=1",
+        "-ffast-math",
+        "-O2",
+        "-funroll-loops",
+    ],
+
+    arch: {
+        arm: {
+            srcs: [
+                // CELT_SOURCES_ARM
+                "celt/arm/armcpu.c",
+                "celt/arm/arm_celt_map.c",
+
+                // DSP, MEDIA and NEON instructions are in the same assembler
+                // file - thus we need to include it even if NEON is not
+                // supported on target platform.
+                // CELT_SOURCES_ARM_ASM
+                "celt/arm/celt_pitch_xcorr_arm_gnu.s",
+
+                // CELT_AM_SOURCES_ARM_ASM
+                "celt/arm/armopts_gnu.s",
+            ],
+
+            cflags: [
+                "-DOPUS_ARM_ASM",
+                "-DOPUS_ARM_INLINE_ASM",
+                "-DOPUS_ARM_MAY_HAVE_EDSP",
+                "-DOPUS_ARM_INLINE_EDSP",
+                "-DOPUS_ARM_MAY_HAVE_MEDIA",
+                "-DOPUS_ARM_INLINE_MEDIA",
+                "-DOPUS_ARM_MAY_HAVE_NEON",
+                "-DOPUS_HAVE_RTCD",
+            ],
+
+            // Note: OPUS enhanced DSP/NEON implementation is not yet
+            // compatible with arm64.  Only add the appropriate defines for
+            // 32-bit arm architecture.
+            neon: {
+                srcs: [
+                    // CELT_SOURCES_ARM_NEON_INTR
+                    "celt/arm/celt_neon_intr.c",
+                    "celt/arm/pitch_neon_intr.c",
+
+                    // SILK_SOURCES_ARM_NEON_INTR,
+                    "silk/arm/arm_silk_map.c",
+                    "silk/arm/biquad_alt_neon_intr.c",
+                    "silk/arm/LPC_inv_pred_gain_neon_intr.c",
+                    "silk/arm/NSQ_del_dec_neon_intr.c",
+                    "silk/arm/NSQ_neon.c",
+
+                    // SILK_SOURCES_FIXED_ARM_NEON_INTR,
+                    "silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c",
+                ],
+
+                cflags: [
+                    "-DOPUS_ARM_MAY_HAVE_NEON",
+                    "-DOPUS_ARM_MAY_HAVE_NEON_INTR",
+                    "-DOPUS_ARM_PRESUME_NEON",
+                    "-DOPUS_ARM_INLINE_NEON",
+                ],
+            },
+
+        },
+
+        x86: {
+            ssse3: {
+                srcs: [
+                    // CELT_SOURCES_SSE
+                    "celt/x86/x86cpu.c",
+                    "celt/x86/x86_celt_map.c",
+                    "celt/x86/pitch_sse.c",
+
+                    // CELT_SOURCES_SSE2
+                    "celt/x86/pitch_sse2.c",
+                    "celt/x86/vq_sse2.c",
+                ],
+
+                cflags: [
+                    "-DOPUS_X86_MAY_HAVE_SSE",
+                    "-DOPUS_X86_PRESUME_SSE",
+                    "-DOPUS_X86_MAY_HAVE_SSE2",
+                    "-DOPUS_X86_PRESUME_SSE2",
+                ],
+            },
+
+            sse4_1: {
+                srcs: [
+                    // CELT_SOURCES_SSE4_1
+                    "celt/x86/celt_lpc_sse4_1.c",
+                    "celt/x86/pitch_sse4_1.c",
+
+                    // SILK_SOURCES_SSE4_1
+                    "silk/x86/NSQ_sse4_1.c",
+                    "silk/x86/NSQ_del_dec_sse4_1.c",
+                    "silk/x86/x86_silk_map.c",
+                    "silk/x86/VAD_sse4_1.c",
+                    "silk/x86/VQ_WMat_EC_sse4_1.c",
+
+                    // SILK_SOURCES_FIXED_SSE4_1
+                    "silk/fixed/x86/vector_ops_FIX_sse4_1.c",
+                    "silk/fixed/x86/burg_modified_FIX_sse4_1.c",
+                ],
+
+                cflags: [
+                    "-DOPUS_X86_MAY_HAVE_SSE4_1",
+                    "-DOPUS_X86_PRESUME_SSE4_1",
+                ],
+            },
+        },
+
+        x86_64: {
+            ssse3: {
+                srcs: [
+                    // CELT_SOURCES_SSE
+                    "celt/x86/x86cpu.c",
+                    "celt/x86/x86_celt_map.c",
+                    "celt/x86/pitch_sse.c",
+
+                    // CELT_SOURCES_SSE2
+                    "celt/x86/pitch_sse2.c",
+                    "celt/x86/vq_sse2.c",
+                ],
+
+                cflags: [
+                    "-DOPUS_X86_MAY_HAVE_SSE",
+                    "-DOPUS_X86_PRESUME_SSE",
+                    "-DOPUS_X86_MAY_HAVE_SSE2",
+                    "-DOPUS_X86_PRESUME_SSE2",
+                ],
+            },
+
+            sse4_1: {
+                srcs: [
+                    // CELT_SOURCES_SSE4_1
+                    "celt/x86/celt_lpc_sse4_1.c",
+                    "celt/x86/pitch_sse4_1.c",
+
+                    // SILK_SOURCES_SSE4_1
+                    "silk/x86/NSQ_sse4_1.c",
+                    "silk/x86/NSQ_del_dec_sse4_1.c",
+                    "silk/x86/x86_silk_map.c",
+                    "silk/x86/VAD_sse4_1.c",
+                    "silk/x86/VQ_WMat_EC_sse4_1.c",
+
+                    // SILK_SOURCES_FIXED_SSE4_1
+                    "silk/fixed/x86/vector_ops_FIX_sse4_1.c",
+                    "silk/fixed/x86/burg_modified_FIX_sse4_1.c",
+                ],
+
+                cflags: [
+                    "-DOPUS_X86_MAY_HAVE_SSE4_1",
+                    "-DOPUS_X86_PRESUME_SSE4_1",
+                ],
+            },
+        },
+    },
+
+    target: {
+        darwin: {
+            enabled: false,
+        },
+    },
+    apex_available: [
+        "//apex_available:platform", // used by libstagefright_soft_opusdec
+        "com.android.media.swcodec",
+        "com.android.btservices",
+    ],
+    min_sdk_version: "29",
+}
diff --git a/CleanSpec.mk b/CleanSpec.mk
new file mode 100644
index 0000000..d531442
--- /dev/null
+++ b/CleanSpec.mk
@@ -0,0 +1,49 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# If you don't need to do a full clean build but would like to touch
+# a file or delete some intermediate files, add a clean step to the end
+# of the list.  These steps will only be run once, if they haven't been
+# run before.
+#
+# E.g.:
+#     $(call add-clean-step, touch -c external/sqlite/sqlite3.h)
+#     $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libz_intermediates)
+#
+# Always use "touch -c" and "rm -f" or "rm -rf" to gracefully deal with
+# files that are missing or have been moved.
+#
+# Use $(PRODUCT_OUT) to get to the "out/target/product/blah/" directory.
+# Use $(OUT_DIR) to refer to the "out" directory.
+#
+# If you need to re-do something that's already mentioned, just copy
+# the command and add it to the bottom of the list.  E.g., if a change
+# that you made last week required touching a file and a change you
+# made today requires touching the same file, just copy the old
+# touch step and add it to the end of the list.
+#
+# ************************************************
+# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
+# ************************************************
+
+# For example:
+#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/APPS/AndroidTests_intermediates)
+#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/JAVA_LIBRARIES/core_intermediates)
+#$(call add-clean-step, find $(OUT_DIR) -type f -name "IGTalkSession*" -print0 | xargs -0 rm -f)
+#$(call add-clean-step, rm -rf $(PRODUCT_OUT)/data/*)
+
+# ************************************************
+# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
+# ************************************************
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..e6cc785
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,22 @@
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update external/libopus
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
+
+name: "libopus"
+description: "Android fork of the opus library."
+third_party {
+  license_type: NOTICE
+  security {
+    tag: "NVD-CPE2.3:cpe:/a:opus-codec:opus:1.0.3"
+  }
+  last_upgrade_date {
+    year: 2024
+    month: 3
+    day: 12
+  }
+  identifier {
+    type: "Git"
+    value: "https://gitlab.xiph.org/xiph/opus.git"
+    version: "v1.5"
+  }
+}
diff --git a/.gitmodules b/MODULE_LICENSE_BSD
similarity index 100%
rename from .gitmodules
rename to MODULE_LICENSE_BSD
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000..108afb2
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,27 @@
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+- Neither the name of Internet Society, IETF or IETF Trust, nor the 
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..c836410
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,4 @@
+# owners for external/sonivox
+include platform/frameworks/av:/media/janitors/codec_OWNERS
+flim@google.com
+essick@google.com
diff --git a/PREUPLOAD.cfg b/PREUPLOAD.cfg
new file mode 100644
index 0000000..ecf8b8e
--- /dev/null
+++ b/PREUPLOAD.cfg
@@ -0,0 +1,2 @@
+[Hook Scripts]
+mainline_hook = ${REPO_ROOT}/frameworks/av/tools/mainline_hook_project.sh
diff --git a/README.android b/README.android
new file mode 100644
index 0000000..06fdc0e
--- /dev/null
+++ b/README.android
@@ -0,0 +1,7 @@
+* current source is based on libopus 1.3 (https://git.xiph.org/?p=opus.git;a=snapshot;h=83d5155f151ca47c9d6274ded1a7481f746b9a43;sf=tgz)
+* libopus is BSD-licensed - http://www.opus-codec.org/license/
+
+Updating:
+* Run "convert_android_asm.sh" from the root of the library (external/libopus).
+  This uses 'arm2gnu.pl' included in libopus to convert ARM ASM files to GNU ASM
+  files for building under the Android NDK.
diff --git a/celt/arm/armopts_gnu.s b/celt/arm/armopts_gnu.s
new file mode 100644
index 0000000..c7082fc
--- /dev/null
+++ b/celt/arm/armopts_gnu.s
@@ -0,0 +1,38 @@
+    .syntax unified
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES@ LOSS OF USE, @ DATA, OR
+   PROFITS@ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN  .if ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+@ Set the following to 1 if we have EDSP instructions
+@  (LDRD/STRD, etc., ARMv5E and later).
+ .set OPUS_ARM_MAY_HAVE_EDSP, 1
+
+@ Set the following to 1 if we have ARMv6 media instructions.
+ .set OPUS_ARM_MAY_HAVE_MEDIA, 1
+
+@ Set the following to 1 if we have NEON (some ARMv7)
+ .set OPUS_ARM_MAY_HAVE_NEON, 1
+
+@ END:
diff --git a/celt/arm/celt_pitch_xcorr_arm_gnu.s b/celt/arm/celt_pitch_xcorr_arm_gnu.s
new file mode 100644
index 0000000..31b0c65
--- /dev/null
+++ b/celt/arm/celt_pitch_xcorr_arm_gnu.s
@@ -0,0 +1,555 @@
+    .syntax unified
+@ Copyright (c) 2007-2008 CSIRO
+@ Copyright (c) 2007-2009 Xiph.Org Foundation
+@ Copyright (c) 2013      Parrot
+@ Written by Aurélien Zanelli
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions
+@ are met:
+@
+@ - Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@
+@ - Redistributions in binary form must reproduce the above copyright
+@ notice, this list of conditions and the following disclaimer in the
+@ documentation and/or other materials provided with the distribution.
+@
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    .text;   .p2align 2;   .arch armv7-a
+   .fpu neon
+   .object_arch armv4t
+
+  .include "celt/arm/armopts_gnu.s"
+
+ .if OPUS_ARM_MAY_HAVE_EDSP
+  .global celt_pitch_xcorr_edsp
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_NEON
+  .global celt_pitch_xcorr_neon
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_NEON
+
+@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+	.type	xcorr_kernel_neon, %function; xcorr_kernel_neon: @ PROC
+xcorr_kernel_neon_start:
+  @ input:
+  @   r3     = int         len
+  @   r4     = opus_val16 *x
+  @   r5     = opus_val16 *y
+  @   q0     = opus_val32  sum[4]
+  @ output:
+  @   q0     = opus_val32  sum[4]
+  @ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+  @ internal usage:
+  @   r12 = int j
+  @   d3  = y_3|y_2|y_1|y_0
+  @   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+  @   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+  @   q8  = scratch
+  @
+  @ Load y[0...3]
+  @ This requires len>0 to always be valid (which we assert in the C code).
+  VLD1.16      {d5}, [r5]!
+  SUBS         r12, r3, #8
+  BLE xcorr_kernel_neon_process4
+@ Process 8 samples at a time.
+@ This loop loads one y value more than we actually need. Therefore we have to
+@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+@ reading past the end of the array.
+xcorr_kernel_neon_process8:
+  @ This loop has 19 total instructions (10 cycles to issue, minimum), with
+  @ - 2 cycles of ARM insrtuctions,
+  @ - 10 cycles of load/store/byte permute instructions, and
+  @ - 9 cycles of data processing instructions.
+  @ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+  @ latter two categories, meaning the whole loop should run in 10 cycles per
+  @ iteration, barring cache misses.
+  @
+  @ Load x[0...7]
+  VLD1.16      {d6, d7}, [r4]!
+  @ Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+  @ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+  VAND         d3, d5, d5
+  SUBS         r12, r12, #8
+  @ Load y[4...11]
+  VLD1.16      {d4, d5}, [r5]!
+  VMLAL.S16    q0, d3, d6[0]
+  VEXT.16      d16, d3, d4, #1
+  VMLAL.S16    q0, d4, d7[0]
+  VEXT.16      d17, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d3, d4, #2
+  VMLAL.S16    q0, d17, d7[1]
+  VEXT.16      d17, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d3, d4, #3
+  VMLAL.S16    q0, d17, d7[2]
+  VEXT.16      d17, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+  VMLAL.S16    q0, d17, d7[3]
+  BGT xcorr_kernel_neon_process8
+@ Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4:
+  ADDS         r12, r12, #4
+  BLE xcorr_kernel_neon_process2
+  @ Load x[0...3]
+  VLD1.16      d6, [r4]!
+  @ Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #4
+  @ Load y[4...7]
+  VLD1.16      d5, [r5]!
+  VMLAL.S16    q0, d4, d6[0]
+  VEXT.16      d16, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+@ Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2:
+  ADDS         r12, r12, #2
+  BLE xcorr_kernel_neon_process1
+  @ Load x[0...1]
+  VLD2.16      {d6[],d7[]}, [r4]!
+  @ Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #2
+  @ Load y[4...5]
+  VLD1.32      {d5[]}, [r5]!
+  VMLAL.S16    q0, d4, d6
+  VEXT.16      d16, d4, d5, #1
+  @ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+  @ instead of VEXT, since it's a data-processing instruction.
+  VSRI.64      d5, d4, #32
+  VMLAL.S16    q0, d16, d7
+@ Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1:
+  @ Load next *x
+  VLD1.16      {d6[]}, [r4]!
+  ADDS         r12, r12, #1
+  @ y[0...3] are left in d5 from prior iteration(s) (if any)
+  VMLAL.S16    q0, d5, d6
+  MOVLE        pc, lr
+@ Now process 1 last sample, not reading ahead.
+  @ Load last *y
+  VLD1.16      {d4[]}, [r5]!
+  VSRI.64      d4, d5, #16
+  @ Load last *x
+  VLD1.16      {d6[]}, [r4]!
+  VMLAL.S16    q0, d4, d6
+  MOV          pc, lr
+	.size xcorr_kernel_neon, .-xcorr_kernel_neon  @ ENDP
+
+@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+@  opus_val32 *xcorr, int len, int max_pitch, int arch)
+	.type	celt_pitch_xcorr_neon, %function; celt_pitch_xcorr_neon: @ PROC
+  @ input:
+  @   r0  = opus_val16 *_x
+  @   r1  = opus_val16 *_y
+  @   r2  = opus_val32 *xcorr
+  @   r3  = int         len
+  @ output:
+  @   r0  = int         maxcorr
+  @ internal usage:
+  @   r4  = opus_val16 *x (for xcorr_kernel_neon())
+  @   r5  = opus_val16 *y (for xcorr_kernel_neon())
+  @   r6  = int         max_pitch
+  @   r12 = int         j
+  @   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+  @ ignored:
+  @         int         arch
+  STMFD        sp!, {r4-r6, lr}
+  LDR          r6, [sp, #16]
+  VMOV.S32     q15, #1
+  @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  SUBS         r6, r6, #4
+  BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4:
+  @ xcorr_kernel_neon parameters:
+  @ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+  MOV          r4, r0
+  MOV          r5, r1
+  VEOR         q0, q0, q0
+  @ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+  @ So we don't save/restore any other registers.
+  BL xcorr_kernel_neon_start
+  SUBS         r6, r6, #4
+  VST1.32      {q0}, [r2]!
+  @ _y += 4
+  ADD          r1, r1, #8
+  VMAX.S32     q15, q15, q0
+  @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  BGE celt_pitch_xcorr_neon_process4
+@ We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done:
+  ADDS         r6, r6, #4
+  @ Reduce maxcorr to a single value
+  VMAX.S32     d30, d30, d31
+  VPMAX.S32    d30, d30, d30
+  @ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+  BLE celt_pitch_xcorr_neon_done
+@ Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining:
+  MOV          r4, r0
+  MOV          r5, r1
+  VMOV.I32     q0, #0
+  SUBS         r12, r3, #8
+  BLT celt_pitch_xcorr_neon_process_remaining4
+@ Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8:
+  @ Load x[0...7]
+  VLD1.16      {q1}, [r4]!
+  @ Load y[0...7]
+  VLD1.16      {q2}, [r5]!
+  SUBS         r12, r12, #8
+  VMLAL.S16    q0, d4, d2
+  VMLAL.S16    q0, d5, d3
+  BGE celt_pitch_xcorr_neon_process_remaining_loop8
+@ Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4:
+  ADDS         r12, r12, #4
+  BLT celt_pitch_xcorr_neon_process_remaining4_done
+  @ Load x[0...3]
+  VLD1.16      {d2}, [r4]!
+  @ Load y[0...3]
+  VLD1.16      {d3}, [r5]!
+  SUB          r12, r12, #4
+  VMLAL.S16    q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done:
+  @ Reduce the sum to a single value.
+  VADD.S32     d0, d0, d1
+  VPADDL.S32   d0, d0
+  ADDS         r12, r12, #4
+  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+@ Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1:
+  VLD1.16      {d2[]}, [r4]!
+  VLD1.16      {d3[]}, [r5]!
+  SUBS         r12, r12, #1
+  VMLAL.S16    q0, d2, d3
+  BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done:
+  VST1.32      {d0[0]}, [r2]!
+  VMAX.S32     d30, d30, d0
+  SUBS         r6, r6, #1
+  @ _y++
+  ADD          r1, r1, #2
+  @ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+  BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done:
+  VMOV.32      r0, d30[0]
+  LDMFD        sp!, {r4-r6, pc}
+	.size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon  @ ENDP
+
+ .endif
+
+ .if OPUS_ARM_MAY_HAVE_EDSP
+
+@ This will get used on ARMv7 devices without NEON, so it has been optimized
+@ to take advantage of dual-issuing where possible.
+	.type	xcorr_kernel_edsp, %function; xcorr_kernel_edsp: @ PROC
+xcorr_kernel_edsp_start:
+  @ input:
+  @   r3      = int         len
+  @   r4      = opus_val16 *_x (must be 32-bit aligned)
+  @   r5      = opus_val16 *_y (must be 32-bit aligned)
+  @   r6...r9 = opus_val32  sum[4]
+  @ output:
+  @   r6...r9 = opus_val32  sum[4]
+  @ preserved: r0-r5
+  @ internal usage
+  @   r2      = int         j
+  @   r12,r14 = opus_val16  x[4]
+  @   r10,r11 = opus_val16  y[4]
+  STMFD        sp!, {r2,r4,r5,lr}
+  LDR          r10, [r5], #4      @ Load y[0...1]
+  SUBS         r2, r3, #4         @ j = len-4
+  LDR          r11, [r5], #4      @ Load y[2...3]
+  BLE xcorr_kernel_edsp_process4_done
+  LDR          r12, [r4], #4      @ Load x[0...1]
+  @ Stall
+xcorr_kernel_edsp_process4:
+  @ The multiplies must issue from pipeline 0, and can't dual-issue with each
+  @ other. Every other instruction here dual-issues with a multiply, and is
+  @ thus "free". There should be no stalls in the body of the loop.
+  SMLABB       r6, r12, r10, r6   @ sum[0] = MAC16_16(sum[0],x_0,y_0)
+  LDR          r14, [r4], #4      @ Load x[2...3]
+  SMLABT       r7, r12, r10, r7   @ sum[1] = MAC16_16(sum[1],x_0,y_1)
+  SUBS         r2, r2, #4         @ j-=4
+  SMLABB       r8, r12, r11, r8   @ sum[2] = MAC16_16(sum[2],x_0,y_2)
+  SMLABT       r9, r12, r11, r9   @ sum[3] = MAC16_16(sum[3],x_0,y_3)
+  SMLATT       r6, r12, r10, r6   @ sum[0] = MAC16_16(sum[0],x_1,y_1)
+  LDR          r10, [r5], #4      @ Load y[4...5]
+  SMLATB       r7, r12, r11, r7   @ sum[1] = MAC16_16(sum[1],x_1,y_2)
+  SMLATT       r8, r12, r11, r8   @ sum[2] = MAC16_16(sum[2],x_1,y_3)
+  SMLATB       r9, r12, r10, r9   @ sum[3] = MAC16_16(sum[3],x_1,y_4)
+  LDRGT        r12, [r4], #4      @ Load x[0...1]
+  SMLABB       r6, r14, r11, r6   @ sum[0] = MAC16_16(sum[0],x_2,y_2)
+  SMLABT       r7, r14, r11, r7   @ sum[1] = MAC16_16(sum[1],x_2,y_3)
+  SMLABB       r8, r14, r10, r8   @ sum[2] = MAC16_16(sum[2],x_2,y_4)
+  SMLABT       r9, r14, r10, r9   @ sum[3] = MAC16_16(sum[3],x_2,y_5)
+  SMLATT       r6, r14, r11, r6   @ sum[0] = MAC16_16(sum[0],x_3,y_3)
+  LDR          r11, [r5], #4      @ Load y[6...7]
+  SMLATB       r7, r14, r10, r7   @ sum[1] = MAC16_16(sum[1],x_3,y_4)
+  SMLATT       r8, r14, r10, r8   @ sum[2] = MAC16_16(sum[2],x_3,y_5)
+  SMLATB       r9, r14, r11, r9   @ sum[3] = MAC16_16(sum[3],x_3,y_6)
+  BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done:
+  ADDS         r2, r2, #4
+  BLE xcorr_kernel_edsp_done
+  LDRH         r12, [r4], #2      @ r12 = *x++
+  SUBS         r2, r2, #1         @ j--
+  @ Stall
+  SMLABB       r6, r12, r10, r6   @ sum[0] = MAC16_16(sum[0],x,y_0)
+  LDRHGT       r14, [r4], #2      @ r14 = *x++
+  SMLABT       r7, r12, r10, r7   @ sum[1] = MAC16_16(sum[1],x,y_1)
+  SMLABB       r8, r12, r11, r8   @ sum[2] = MAC16_16(sum[2],x,y_2)
+  SMLABT       r9, r12, r11, r9   @ sum[3] = MAC16_16(sum[3],x,y_3)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r10, r6   @ sum[0] = MAC16_16(sum[0],x,y_1)
+  SUBS         r2, r2, #1         @ j--
+  SMLABB       r7, r14, r11, r7   @ sum[1] = MAC16_16(sum[1],x,y_2)
+  LDRH         r10, [r5], #2      @ r10 = y_4 = *y++
+  SMLABT       r8, r14, r11, r8   @ sum[2] = MAC16_16(sum[2],x,y_3)
+  LDRHGT       r12, [r4], #2      @ r12 = *x++
+  SMLABB       r9, r14, r10, r9   @ sum[3] = MAC16_16(sum[3],x,y_4)
+  BLE xcorr_kernel_edsp_done
+  SMLABB       r6, r12, r11, r6   @ sum[0] = MAC16_16(sum[0],tmp,y_2)
+  CMP          r2, #1             @ j--
+  SMLABT       r7, r12, r11, r7   @ sum[1] = MAC16_16(sum[1],tmp,y_3)
+  LDRH         r2, [r5], #2       @ r2 = y_5 = *y++
+  SMLABB       r8, r12, r10, r8   @ sum[2] = MAC16_16(sum[2],tmp,y_4)
+  LDRHGT       r14, [r4]          @ r14 = *x
+  SMLABB       r9, r12, r2, r9    @ sum[3] = MAC16_16(sum[3],tmp,y_5)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r11, r6   @ sum[0] = MAC16_16(sum[0],tmp,y_3)
+  LDRH         r11, [r5]          @ r11 = y_6 = *y
+  SMLABB       r7, r14, r10, r7   @ sum[1] = MAC16_16(sum[1],tmp,y_4)
+  SMLABB       r8, r14, r2, r8    @ sum[2] = MAC16_16(sum[2],tmp,y_5)
+  SMLABB       r9, r14, r11, r9   @ sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done:
+  LDMFD        sp!, {r2,r4,r5,pc}
+	.size xcorr_kernel_edsp, .-xcorr_kernel_edsp  @ ENDP
+
+	.type	celt_pitch_xcorr_edsp, %function; celt_pitch_xcorr_edsp: @ PROC
+  @ input:
+  @   r0  = opus_val16 *_x (must be 32-bit aligned)
+  @   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
+  @   r2  = opus_val32 *xcorr
+  @   r3  = int         len
+  @ output:
+  @   r0  = maxcorr
+  @ internal usage
+  @   r4  = opus_val16 *x
+  @   r5  = opus_val16 *y
+  @   r6  = opus_val32  sum0
+  @   r7  = opus_val32  sum1
+  @   r8  = opus_val32  sum2
+  @   r9  = opus_val32  sum3
+  @   r1  = int         max_pitch
+  @   r12 = int         j
+  @ ignored:
+  @         int         arch
+  STMFD        sp!, {r4-r11, lr}
+  MOV          r5, r1
+  LDR          r1, [sp, #36]
+  MOV          r4, r0
+  TST          r5, #3
+  @ maxcorr = 1
+  MOV          r0, #1
+  BEQ          celt_pitch_xcorr_edsp_process1u_done
+@ Compute one sum at the start to make y 32-bit aligned.
+  SUBS         r12, r3, #4
+  @ r14 = sum = 0
+  MOV          r14, #0
+  LDRH         r8, [r5], #2
+  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+  LDR          r6, [r4], #4
+  MOV          r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4:
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     @ sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLATB       r14, r6, r9, r14     @ sum = MAC16_16(sum, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLABT       r14, r7, r9, r14     @ sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         @ j-=4
+  SMLATB       r14, r7, r8, r14     @ sum = MAC16_16(sum, x_3, y_3)
+  LDRGT        r6, [r4], #4
+  BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done:
+  ADDS         r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1:
+  LDRHGE       r6, [r4], #2
+  @ Stall
+  SMLABBGE     r14, r6, r8, r14    @ sum = MAC16_16(sum, *x, *y)
+  SUBSGE       r12, r12, #1
+  LDRHGT       r8, [r5], #2
+  BGT celt_pitch_xcorr_edsp_process1u_loop1
+  @ Restore _x
+  SUB          r4, r4, r3, LSL #1
+  @ Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  @ maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ADD          r5, r5, #2
+  MOVLT        r0, r14
+  SUBS         r1, r1, #1
+  @ xcorr[i] = sum
+  STR          r14, [r2], #4
+  BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done:
+  @ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+  SUBS         r1, r1, #4
+  BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4:
+  @ xcorr_kernel_edsp parameters:
+  @ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+  MOV          r6, #0
+  MOV          r7, #0
+  MOV          r8, #0
+  MOV          r9, #0
+  BL xcorr_kernel_edsp_start  @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  @ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+  CMP          r0, r6
+  @ _y+=4
+  ADD          r5, r5, #8
+  MOVLT        r0, r6
+  CMP          r0, r7
+  MOVLT        r0, r7
+  CMP          r0, r8
+  MOVLT        r0, r8
+  CMP          r0, r9
+  MOVLT        r0, r9
+  STMIA        r2!, {r6-r9}
+  SUBS         r1, r1, #4
+  BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2:
+  ADDS         r1, r1, #2
+  BLT celt_pitch_xcorr_edsp_process1a
+  SUBS         r12, r3, #4
+  @ {r10, r11} = {sum0, sum1} = {0, 0}
+  MOV          r10, #0
+  MOV          r11, #0
+  LDR          r8, [r5], #4
+  BLE celt_pitch_xcorr_edsp_process2_loop_done
+  LDR          r6, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4:
+  SMLABB       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLABT       r11, r6, r8, r11     @ sum1 = MAC16_16(sum1, x_0, y_1)
+  SUBS         r12, r12, #4         @ j-=4
+  SMLATT       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLATB       r11, r6, r9, r11     @ sum1 = MAC16_16(sum1, x_1, y_2)
+  LDRGT        r6, [r4], #4
+  SMLABB       r10, r7, r9, r10     @ sum0 = MAC16_16(sum0, x_2, y_2)
+  SMLABT       r11, r7, r9, r11     @ sum1 = MAC16_16(sum1, x_2, y_3)
+  SMLATT       r10, r7, r9, r10     @ sum0 = MAC16_16(sum0, x_3, y_3)
+  LDRGT        r9, [r5], #4
+  SMLATB       r11, r7, r8, r11     @ sum1 = MAC16_16(sum1, x_3, y_4)
+  BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done:
+  ADDS         r12, r12, #2
+  BLE  celt_pitch_xcorr_edsp_process2_1
+  LDR          r6, [r4], #4
+  @ Stall
+  SMLABB       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r9, [r5], #4
+  SMLABT       r11, r6, r8, r11     @ sum1 = MAC16_16(sum1, x_0, y_1)
+  SUB          r12, r12, #2
+  SMLATT       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_1, y_1)
+  MOV          r8, r9
+  SMLATB       r11, r6, r9, r11     @ sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1:
+  LDRH         r6, [r4], #2
+  ADDS         r12, r12, #1
+  @ Stall
+  SMLABB       r10, r6, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_0)
+  LDRHGT       r7, [r4], #2
+  SMLABT       r11, r6, r8, r11     @ sum1 = MAC16_16(sum1, x_0, y_1)
+  BLE celt_pitch_xcorr_edsp_process2_done
+  LDRH         r9, [r5], #2
+  SMLABT       r10, r7, r8, r10     @ sum0 = MAC16_16(sum0, x_0, y_1)
+  SMLABB       r11, r7, r9, r11     @ sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done:
+  @ Restore _x
+  SUB          r4, r4, r3, LSL #1
+  @ Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  @ maxcorr = max(maxcorr, sum0)
+  CMP          r0, r10
+  ADD          r5, r5, #2
+  MOVLT        r0, r10
+  SUB          r1, r1, #2
+  @ maxcorr = max(maxcorr, sum1)
+  CMP          r0, r11
+  @ xcorr[i] = sum
+  STR          r10, [r2], #4
+  MOVLT        r0, r11
+  STR          r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a:
+  ADDS         r1, r1, #1
+  BLT celt_pitch_xcorr_edsp_done
+  SUBS         r12, r3, #4
+  @ r14 = sum = 0
+  MOV          r14, #0
+  BLT celt_pitch_xcorr_edsp_process1a_loop_done
+  LDR          r6, [r4], #4
+  LDR          r8, [r5], #4
+  LDR          r7, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4:
+  SMLABB       r14, r6, r8, r14     @ sum = MAC16_16(sum, x_0, y_0)
+  SUBS         r12, r12, #4         @ j-=4
+  SMLATT       r14, r6, r8, r14     @ sum = MAC16_16(sum, x_1, y_1)
+  LDRGE        r6, [r4], #4
+  SMLABB       r14, r7, r9, r14     @ sum = MAC16_16(sum, x_2, y_2)
+  LDRGE        r8, [r5], #4
+  SMLATT       r14, r7, r9, r14     @ sum = MAC16_16(sum, x_3, y_3)
+  LDRGE        r7, [r4], #4
+  LDRGE        r9, [r5], #4
+  BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done:
+  ADDS         r12, r12, #2
+  LDRGE        r6, [r4], #4
+  LDRGE        r8, [r5], #4
+  @ Stall
+  SMLABBGE     r14, r6, r8, r14     @ sum = MAC16_16(sum, x_0, y_0)
+  SUBGE        r12, r12, #2
+  SMLATTGE     r14, r6, r8, r14     @ sum = MAC16_16(sum, x_1, y_1)
+  ADDS         r12, r12, #1
+  LDRHGE       r6, [r4], #2
+  LDRHGE       r8, [r5], #2
+  @ Stall
+  SMLABBGE     r14, r6, r8, r14     @ sum = MAC16_16(sum, *x, *y)
+  @ maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  @ xcorr[i] = sum
+  STR          r14, [r2], #4
+  MOVLT        r0, r14
+celt_pitch_xcorr_edsp_done:
+  LDMFD        sp!, {r4-r11, pc}
+	.size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp  @ ENDP
+
+ .endif
+
+@ END:
+    .section	.note.GNU-stack,"",%progbits
diff --git a/celt/stack_alloc.h b/celt/stack_alloc.h
index e2739bd..b9ac0ab 100644
--- a/celt/stack_alloc.h
+++ b/celt/stack_alloc.h
@@ -88,10 +88,22 @@
  * @param type Type of element
  */
 
+#ifndef SIMD_EXTRA_ALLOC_BYTES
+#error define SIMD_EXTRA_ALLOC_BYTES appropriately in your makefile
+/*
+ * Useful values:
+ * 0  for an all-scalar processor, which should never over-read the arrays
+ * 16 for an implementation using ARM Neon or X86 SSE4 instructions, which work
+ *    with blocks of 16 bytes (128 bits)
+ */
+#endif
+
 #if defined(VAR_ARRAYS)
 
 #define VARDECL(type, var)
-#define ALLOC(var, size, type) type var[size]
+// include a full SIMD width afterwards;
+#define ALLOC(var, size, type) type var[(size) + ((SIMD_EXTRA_ALLOC_BYTES)/sizeof(type))]
+
 #define SAVE_STACK
 #define RESTORE_STACK
 #define ALLOC_STACK
@@ -103,9 +115,11 @@
 #define VARDECL(type, var) type *var
 
 # ifdef _WIN32
-#  define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size)))
+#  define ALLOC(var, size, type) var = \
+                                 ((type*)_alloca(sizeof(type)*(size) + SIMD_EXTRA_ALLOC_BYTES))
 # else
-#  define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+#  define ALLOC(var, size, type) var = \
+                                 ((type*)alloca(sizeof(type)*(size) + SIMD_EXTRA_ALLOC_BYTES))
 # endif
 
 #define SAVE_STACK
@@ -151,6 +165,11 @@
 
 #endif /* ENABLE_VALGRIND */
 
+// this path has NOT been modified to be safe in the face of SIMD over-reads
+#if SIMD_EXTRA_ALLOC_BYTES != 0
+#error  "ALLOC() is not updated in this configuration to provide for SIMD over-reads"
+#endif
+
 #include "os_support.h"
 #define VARDECL(type, var) type *var
 #define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
diff --git a/convert_android_asm.sh b/convert_android_asm.sh
new file mode 100755
index 0000000..ea3d198
--- /dev/null
+++ b/convert_android_asm.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -e
+ASM_CONVERTER="./celt/arm/arm2gnu.pl"
+
+if [[ ! -x "${ASM_CONVERTER}" ]]; then
+  echo "This script should be run from external/libopus."
+  exit
+fi
+
+while read file; do
+  # This check is required because the ASM conversion script doesn't seem to be
+  # idempotent.
+  if [[ ! "${file}" =~ .*_gnu\.s$ ]]; then
+    gnu_file="${file%.s}_gnu.s"
+    ${ASM_CONVERTER} "${file}" > "${gnu_file}"
+    # The ASM conversion script replaces includes with *_gnu.S. So, replace
+    # occurences of "*-gnu.S" with "*_gnu.s".
+    sed -i "s/-gnu\.S/_gnu\.s/g" "${gnu_file}"
+    rm -f "${file}"
+  fi
+done < <(find . -iname '*.s')
+
+# Generate armopts.s from armopts.s.in
+sed \
+  -e "s/@OPUS_ARM_MAY_HAVE_EDSP@/1/g" \
+  -e "s/@OPUS_ARM_MAY_HAVE_MEDIA@/1/g" \
+  -e "s/@OPUS_ARM_MAY_HAVE_NEON@/1/g" \
+  -e "s/@OPUS_ARM_MAY_HAVE_NEON_INTR@/1/g" \
+	celt/arm/armopts.s.in > celt/arm/armopts.s.temp
+${ASM_CONVERTER} "celt/arm/armopts.s.temp" > "celt/arm/armopts_gnu.s"
+rm "celt/arm/armopts.s.temp"
+echo "Converted all ASM files and generated armopts.s successfully."
diff --git a/fuzzer/Android.bp b/fuzzer/Android.bp
new file mode 100644
index 0000000..be47f44
--- /dev/null
+++ b/fuzzer/Android.bp
@@ -0,0 +1,107 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+package {
+    // http://go/android-license-faq
+    // A large-scale-change added 'default_applicable_licenses' to import
+    // the below license kinds from "external_libopus_license":
+    //   SPDX-license-identifier-Apache-2.0
+    default_applicable_licenses: ["external_libopus_license"],
+}
+
+cc_defaults {
+    name: "opus_fuzz_defaults",
+    host_supported: true,
+
+    static_libs: [
+        "libopus",
+    ],
+
+    fuzz_config: {
+        cc: [
+            "android-media-fuzzing-reports@google.com",
+        ],
+        componentid: 155276,
+        hotlists: [
+            "4593311",
+        ],
+        description: "The fuzzer targets the APIs of libopus",
+        vector: "remote",
+        service_privilege: "constrained",
+        users: "multi_user",
+        fuzzed_code_usage: "shipped",
+    },
+}
+
+cc_fuzz {
+    name: "opus_dec_fuzzer",
+
+    srcs: [
+        "opus_dec_fuzzer.cpp",
+    ],
+
+    defaults: [
+        "opus_fuzz_defaults",
+    ],
+}
+
+cc_fuzz {
+    name: "opus_multistream_dec_fuzzer",
+
+    srcs: [
+        "opus_dec_fuzzer.cpp",
+    ],
+
+    cflags: [
+        "-DMULTISTREAM",
+    ],
+
+    defaults: [
+        "opus_fuzz_defaults",
+    ],
+}
+
+cc_fuzz {
+    name: "opus_enc_fuzzer",
+
+    srcs: [
+        "opus_enc_fuzzer.cpp",
+    ],
+
+    defaults: [
+        "opus_fuzz_defaults"
+    ],
+}
+
+cc_fuzz {
+    name: "opus_multistream_enc_fuzzer",
+
+    srcs: [
+        "opus_enc_fuzzer.cpp",
+    ],
+
+    defaults: [
+        "opus_fuzz_defaults"
+    ],
+
+    cflags: [
+        "-DMULTISTREAM",
+    ],
+}
diff --git a/fuzzer/README.md b/fuzzer/README.md
new file mode 100644
index 0000000..42b9057
--- /dev/null
+++ b/fuzzer/README.md
@@ -0,0 +1,152 @@
+# Fuzzer for libopus decoder
+
+## Plugin Design Considerations
+The fuzzer plugin for opus decoder is designed based on the understanding of the
+codec and tries to achieve the following:
+
+##### Maximize code coverage
+
+This fuzzer provides support for both single stream and multi stream inputs,
+thus enabling fuzzing for API's provided for single stream as well as multi
+stream.
+
+Following arguments are passed to OPUS_DEC_CREATE_API:
+
+1. Sampling frequency (parameter name: `Fs`)
+2. Number of channels (parameter name: `channels`)
+
+| Parameter| Valid Values| Configured Value|
+|------------- |-------------| ----- |
+| `Fs` | `8000 ` `12000 ` `16000 ` `24000 ` `48000 ` | Derived from Byte-9 of input stream|
+| `channels`   | `1 ` `2 ` | Derived from Byte-9 of input stream |
+
+##### Maximize utilization of input data
+The plugin feeds the entire input data to the codec. Frame sizes are determined only
+after the call to extractor, so in absence of call to extractor,
+we feed the entire data to the decoder.
+This ensures that the plugin tolerates any kind of input (empty, huge,
+malformed, etc) and doesnt `exit()` on any input and thereby increasing the
+chance of identifying vulnerabilities.
+
+## Build
+
+This describes steps to build opus_dec_fuzzer and opus_multistream_dec_fuzzer binary.
+
+## Android
+
+### Steps to build
+Build the fuzzer
+```
+  $ mm -j$(nproc) opus_dec_fuzzer
+  $ mm -j$(nproc) opus_multistream_dec_fuzzer
+```
+
+### Steps to run
+Create a directory CORPUS_DIR and copy some opus files to that folder.
+Push this directory to device.
+
+To run on device
+```
+  $ adb sync data
+  $ adb shell /data/fuzz/arm64/opus_dec_fuzzer/opus_dec_fuzzer CORPUS_DIR
+  $ adb shell /data/fuzz/arm64/opus_multistream_dec_fuzzer/opus_multistream_dec_fuzzer CORPUS_DIR
+```
+To run on host
+```
+  $ $ANDROID_HOST_OUT/fuzz/x86_64/opus_dec_fuzzer/opus_dec_fuzzer CORPUS_DIR
+  $ $ANDROID_HOST_OUT/fuzz/x86_64/opus_multistream_dec_fuzzer/opus_multistream_dec_fuzzer CORPUS_DIR
+```
+
+# Fuzzer for libopus encoder
+
+## Plugin Design Considerations
+The fuzzer plugin for opus encoder is designed based on the understanding of the
+codec and tries to achieve the following:
+
+##### Maximize code coverage
+
+This fuzzer provides support for both single stream and multi stream inputs,
+thus enabling fuzzing for API's provided for single stream as well as multi
+stream.
+Following arguments are passed to OPUS_ENC_CREATE_API:
+
+1. Sampling rate (parameter name: `sampleRate`)
+2. Number of channels (parameter name: `channels`)
+
+| Parameter| Valid Values| Configured Value|
+|------------- |-------------| ----- |
+| `sampleRate` | `8000 ` `12000 ` `16000 ` `24000 ` `48000 ` | An index ranging from 0-4 is calculated using first byte of data which is used to assign value to `sampleRate` from array `kSampleRates`|
+| `channels` (single stream)   | `1 ` `2 ` | Calculated using second byte of data |
+| `channels` (multi stream)   | In range `1 `- `255 ` | Calculated using second byte of data |
+
+Following arguments are passed to OPUS_ENC_CTL_API:
+
+1. OPUS_SET_BITRATE (parameter name: `bitRate`)
+2. OPUS_SET_COMPLEXITY (parameter name: `complexity`)
+3. OPUS_SET_APPLICATION (parameter name: `application`)
+4. OPUS_SET_DTX (parameter name: `setDTX`)
+5. OPUS_SET_SIGNAL (parameter name: `signal`)
+6. OPUS_SET_VBR (parameter name: `setVBR`)
+7. OPUS_SET_VBR_CONSTRAINT (parameter name: `setVBRConstraint`)
+8. OPUS_SET_FORCE_CHANNELS (parameter name: `forceChannel`)
+9. OPUS_SET_MAX_BANDWIDTH (parameter name: `maxBandwidth`)
+10. OPUS_SET_INBAND_FEC (parameter name: `setInbandFec`)
+11. OPUS_SET_PACKET_LOSS_PERC (parameter name: `pktLoss`)
+12. OPUS_SET_LSB_DEPTH (parameter name: `lsbDepth`)
+13. OPUS_SET_PREDICTION_DISABLED (parameter name: `setPredDisable`)
+14. OPUS_SET_EXPERT_FRAME_DURATION (parameter name: `frameSizeEnum`)
+
+| Parameter| Valid Values| Configured Value|
+|------------- |-------------| ----- |
+| `bitRate`   | In range `500 ` to `512000 ` | Calculated using all bits of 3rd, 4th and 5th byte of data |
+| `complexity`   |0.`0 ` 1.`1 ` 2.`2 ` 3.`3 ` 4.`4 ` 5.`5 ` 6.`6 ` 7.`7 ` 8.`8 ` 9.`9 ` 10.`10 ` | Calculated using all bits of 6th byte of data |
+| `application`   | 0.`OPUS_APPLICATION_VOIP ` 1.`OPUS_APPLICATION_AUDIO ` 2.`OPUS_APPLICATION_RESTRICTED_LOWDELAY ` | Calculated using all bits of 7th byte of data |
+| `setDTX`   | 0.`0 ` 1.`1 ` | Calculated using bit 0 of 8th byte of data |
+| `signal`   | 0.`OPUS_AUTO ` 1.`OPUS_SIGNAL_VOICE ` 2.`OPUS_SIGNAL_MUSIC ` | Calculated using bit 0 and bit 1 of 9th byte of data |
+| `setVBR`   | 0.`0 ` 1.`1 `  | Calculated using bit 0 of 10th byte of data |
+| `setVBRConstraint`   | 0.`0 ` 1.`1 ` | Calculated using bit 0 of 11th byte of data |
+| `forceChannel`   | 0.`OPUS_AUTO ` 1.`1 ` 2.`2 ` | Calculated using all bits of 12th byte of data |
+| `maxBandwidth`   | 0.`OPUS_BANDWIDTH_NARROWBAND ` 1.`OPUS_BANDWIDTH_MEDIUMBAND ` 2.`OPUS_BANDWIDTH_WIDEBAND ` 3.`OPUS_BANDWIDTH_SUPERWIDEBAND ` 4.`OPUS_BANDWIDTH_FULLBAND ` | Calculated using all bits of 13th byte of data |
+| `setInbandFec`   | 0.`0 ` 1.`1 ` | Calculated using bit 0 of 14th byte of data |
+| `pktLoss`   | 0.`0 ` 1.`1 ` 2.`2 ` 3.`5 `| Calculated using all bits of 15th byte of data |
+| `lsbDepth`   | 0.`8 ` 1.`24 ` | Calculated using bit 0 of 16th byte of data |
+| `setPredDisable`   | 0.`0 ` 1.`1 ` | Calculated using bit 0 of 17th byte of data |
+| `frameSizeEnum`   | 0.`OPUS_FRAMESIZE_2_5_MS ` 1.`OPUS_FRAMESIZE_5_MS ` 2.`OPUS_FRAMESIZE_10_MS ` 3.`OPUS_FRAMESIZE_20_MS ` 4.`OPUS_FRAMESIZE_40_MS ` 5.`OPUS_FRAMESIZE_60_MS ` 6.`OPUS_FRAMESIZE_80_MS ` 7.`OPUS_FRAMESIZE_100_MS ` 8.`OPUS_FRAMESIZE_120_MS ` | Calculated using all bits of 18th byte of data |
+
+
+##### Maximize utilization of input data
+The plugin feeds the entire input data to the codec. For buffer size which is not a multiple of mNumPcmBytesPerInputFrame, we will accumulate the input and keep it. Once the input is filled with expected number of bytes, we will send it to encoder. This ensures that the plugin tolerates any kind of input (empty, huge,
+malformed, etc) and doesnt `exit()` on any input and thereby increasing the chance of identifying vulnerabilities.
+
+## Build
+
+This describes steps to build opus_enc_fuzzer and opus_multistream_enc_fuzzer.
+
+## Android
+
+### Steps to build
+Build the fuzzer
+```
+  $ mm -j$(nproc) opus_enc_fuzzer
+  $ mm -j$(nproc) opus_multistream_enc_fuzzer
+```
+
+### Steps to run
+Create a directory CORPUS_DIR and copy some raw media files to that folder.
+Push this directory to device.
+
+To run on device
+```
+  $ adb sync data
+  $ adb shell /data/fuzz/arm64/opus_enc_fuzzer/opus_enc_fuzzer CORPUS_DIR
+  $ adb shell /data/fuzz/arm64/opus_multistream_enc_fuzzer/opus_multistream_enc_fuzzer CORPUS_DIR
+```
+To run on host
+```
+  $ $ANDROID_HOST_OUT/fuzz/x86_64/opus_enc_fuzzer/opus_enc_fuzzer CORPUS_DIR
+  $ $ANDROID_HOST_OUT/fuzz/x86_64/opus_multistream_enc_fuzzer/opus_multistream_enc_fuzzer CORPUS_DIR
+```
+
+## References:
+ * http://llvm.org/docs/LibFuzzer.html
+ * https://github.com/google/oss-fuzz
diff --git a/fuzzer/opus_dec_fuzzer.cpp b/fuzzer/opus_dec_fuzzer.cpp
new file mode 100644
index 0000000..23bf69e
--- /dev/null
+++ b/fuzzer/opus_dec_fuzzer.cpp
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <opus.h>
+
+/* 4 bytes: packet length, 4 bytes: encoder final range */
+constexpr int kSetupByteOffset = 8;
+constexpr int kMaxFrameSample = 5760;
+const int kSamplingRates[] = {8000, 12000, 16000, 24000, 48000};
+constexpr int kNumberSamplingRates = sizeof(kSamplingRates) / sizeof(kSamplingRates[0]);
+
+#ifdef MULTISTREAM
+#include "opus_multistream.h"
+#define OPUS_DEC_DATA_TYPE OpusMSDecoder
+#define OPUS_DEC_DECODE_API opus_multistream_decode
+#define OPUS_DEC_CREATE_API ms_opus_decoder_create
+#define OPUS_DEC_DESTROY_API opus_multistream_decoder_destroy
+static OpusMSDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *error) {
+  int streams = 1;
+  int coupledStreams = channels == 2;
+  unsigned char mapping[256] = {0, 1};
+  return opus_multistream_decoder_create(Fs, channels, streams, coupledStreams, mapping, error);
+}
+#else
+#define OPUS_DEC_DATA_TYPE OpusDecoder
+#define OPUS_DEC_DECODE_API opus_decode
+#define OPUS_DEC_CREATE_API opus_decoder_create
+#define OPUS_DEC_DESTROY_API opus_decoder_destroy
+#endif
+
+class Codec {
+ public:
+  Codec() = default;
+  ~Codec() { deInitDecoder(); }
+  bool initDecoder(const uint8_t *data);
+  void decodeFrames(const uint8_t *data, size_t size);
+  void deInitDecoder();
+
+ private:
+  int mSamplingRate;
+  int mNoOfChannels;
+  OPUS_DEC_DATA_TYPE *mDec = nullptr;
+  opus_int16 *mPcm = nullptr;
+};
+
+bool Codec::initDecoder(const uint8_t *data) {
+  const uint8_t *tocPtr = &data[kSetupByteOffset];
+  const int bandwidth = opus_packet_get_bandwidth(tocPtr);
+  int samplingRateIndex = bandwidth - OPUS_BANDWIDTH_NARROWBAND;
+
+  /*bounds check on samplingRateIndex*/
+  if ((samplingRateIndex >= 0) && (samplingRateIndex < kNumberSamplingRates)) {
+    mSamplingRate = kSamplingRates[samplingRateIndex];
+  } else {
+    mSamplingRate = 8000;  // set to a default value
+  }
+
+  mNoOfChannels = opus_packet_get_nb_channels(tocPtr);
+  if ((mNoOfChannels != 1) && (mNoOfChannels != 2)) {
+    mNoOfChannels = 1;
+  }
+
+  int err;
+  mDec = OPUS_DEC_CREATE_API(mSamplingRate, mNoOfChannels, &err);
+  if (!mDec || err != OPUS_OK) {
+    return false;
+  }
+  size_t sizePcm = sizeof(*mPcm) * kMaxFrameSample * mNoOfChannels;
+  mPcm = static_cast<opus_int16 *>(malloc(sizePcm));
+  if (!mPcm) {
+    return false;
+  }
+  memset(mPcm, 0x0, sizePcm);
+  return true;
+}
+
+void Codec::deInitDecoder() {
+  OPUS_DEC_DESTROY_API(mDec);
+  mDec = nullptr;
+  if (mPcm) {
+    free(mPcm);
+  }
+  mPcm = nullptr;
+}
+
+void Codec::decodeFrames(const uint8_t *data, size_t size) {
+  (void)OPUS_DEC_DECODE_API(mDec, data, size, mPcm, kMaxFrameSample, 0 /*fec*/);
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  if (size < kSetupByteOffset + 1) {
+    return 0;
+  }
+  Codec *codec = new Codec();
+  if (!codec) {
+    return 0;
+  }
+  if (codec->initDecoder(data)) {
+    codec->decodeFrames(data, size);
+  }
+  delete codec;
+  return 0;
+}
diff --git a/fuzzer/opus_enc_fuzzer.cpp b/fuzzer/opus_enc_fuzzer.cpp
new file mode 100644
index 0000000..c2258fd
--- /dev/null
+++ b/fuzzer/opus_enc_fuzzer.cpp
@@ -0,0 +1,314 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+ */
+
+#include <algorithm>
+
+#include "opus.h"
+
+using namespace std;
+
+constexpr int kFrameDuration = 50;
+constexpr int kMaxPacket = 1500;
+constexpr int kMinBitRate = 500;
+constexpr int kMaxBitRate = 512000;
+
+constexpr opus_int32 kSampleRates[] = {8000, 12000, 16000, 24000, 48000};
+constexpr size_t kSampleRatesSize = size(kSampleRates);
+
+#ifndef MULTISTREAM
+constexpr int kChannels[] = {1, 2};
+constexpr size_t kChannelsSize = size(kChannels);
+#endif
+
+constexpr int kApplications[] = {OPUS_APPLICATION_VOIP, OPUS_APPLICATION_AUDIO,
+                                 OPUS_APPLICATION_RESTRICTED_LOWDELAY};
+constexpr size_t kApplicationsSize = size(kApplications);
+
+constexpr int kSignals[] = {OPUS_AUTO, OPUS_SIGNAL_VOICE, OPUS_SIGNAL_MUSIC};
+constexpr size_t kSignalsSize = size(kSignals);
+
+constexpr int kSetDTX[] = {0, 1};
+constexpr size_t kSetDTXSize = size(kSetDTX);
+
+constexpr int kSetVBR[] = {0, 1};
+constexpr size_t kSetVBRSize = size(kSetVBR);
+
+constexpr int kSetInbandFec[] = {0, 1};
+constexpr size_t kSetInbandFecSize = size(kSetInbandFec);
+
+constexpr int kSetVBRConstraint[] = {0, 1};
+constexpr size_t kSetVBRConstraintSize = size(kSetVBRConstraint);
+
+constexpr int kSetPredDisable[] = {0, 1};
+constexpr size_t kSetPredDisableSize = size(kSetPredDisable);
+
+constexpr int kComplexities[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+constexpr size_t kComplexitiesSize = size(kComplexities);
+
+constexpr int kForceChannels[] = {OPUS_AUTO, 1, 2};
+constexpr size_t kForceChannelsSize = size(kForceChannels);
+
+constexpr int kMaxBandwidths[] = {OPUS_BANDWIDTH_NARROWBAND, OPUS_BANDWIDTH_MEDIUMBAND,
+                                  OPUS_BANDWIDTH_WIDEBAND, OPUS_BANDWIDTH_SUPERWIDEBAND,
+                                  OPUS_BANDWIDTH_FULLBAND};
+constexpr size_t kMaxBandwidthsSize = size(kMaxBandwidths);
+
+constexpr int kPacketLossPerc[] = {0, 1, 2, 5};
+constexpr size_t kPacketLossPercSize = size(kPacketLossPerc);
+
+constexpr int kLsbDepths[] = {8, 24};
+constexpr size_t kLsbDepthsSize = size(kLsbDepths);
+
+constexpr int kFrameDurations[] = {
+    OPUS_FRAMESIZE_2_5_MS, OPUS_FRAMESIZE_5_MS,   OPUS_FRAMESIZE_10_MS,
+    OPUS_FRAMESIZE_20_MS,  OPUS_FRAMESIZE_40_MS,  OPUS_FRAMESIZE_60_MS,
+    OPUS_FRAMESIZE_80_MS,  OPUS_FRAMESIZE_100_MS, OPUS_FRAMESIZE_120_MS};
+constexpr size_t kFrameDurationsSize = size(kFrameDurations);
+
+#ifdef MULTISTREAM
+#include "opus_multistream.h"
+#define OPUS_ENC_DATA_TYPE OpusMSEncoder
+#define OPUS_ENC_ENCODE_API opus_multistream_encode
+#define OPUS_ENC_CTL_API opus_multistream_encoder_ctl
+#define OPUS_ENC_CREATE_API ms_opus_encoder_create
+#define OPUS_ENC_DESTROY_API opus_multistream_encoder_destroy
+static OpusMSEncoder* ms_opus_encoder_create(opus_int32 sampleRate, int channels, int application,
+                                             int* error) {
+    unsigned char* mapping = (unsigned char*)malloc(sizeof(unsigned char) * channels);
+    if (!mapping) {
+        *error = 1;
+        return nullptr;
+    }
+    for (unsigned char x = 0; x < channels; ++x) {
+        mapping[x] = x;
+    }
+    OpusMSEncoder* enc = opus_multistream_encoder_create(sampleRate, channels, 1, channels - 1,
+                                                         mapping, application, error);
+    free(mapping);
+    return enc;
+}
+#else
+#define OPUS_ENC_DATA_TYPE OpusEncoder
+#define OPUS_ENC_ENCODE_API opus_encode
+#define OPUS_ENC_CTL_API opus_encoder_ctl
+#define OPUS_ENC_CREATE_API opus_encoder_create
+#define OPUS_ENC_DESTROY_API opus_encoder_destroy
+#endif
+
+enum {
+    IDX_SAMPLE_RATE_INDEX = 0,
+    IDX_CHANNEL,
+    IDX_BIT_RATE_1,
+    IDX_BIT_RATE_2,
+    IDX_BIT_RATE_3,
+    IDX_COMPLEXITY,
+    IDX_APPLICATION,
+    IDX_SET_DTX,
+    IDX_SET_SIGNAL,
+    IDX_SET_VBR,
+    IDX_SET_VBR_CONSTRAINT,
+    IDX_FORCE_CHANNEL_INDEX,
+    IDX_SET_MAX_BANDWIDTH,
+    IDX_SET_INBAND_FEC,
+    IDX_SET_PACKET_LOSS_PERC,
+    IDX_SET_LSB_DEPTH,
+    IDX_SET_PREDICTION_DISABLED,
+    IDX_FRAME_ENUM,
+    IDX_LAST
+};
+
+template <typename type1, typename type2, typename type3>
+auto generateNumberInRangeFromData(type1 data, type2 min, type3 max) -> decltype(max) {
+    return (data % (1 + max - min)) + min;
+}
+
+class Codec {
+   public:
+    ~Codec() { deInitEncoder(); }
+    bool initEncoder(uint8_t** dataPtr, size_t* sizePtr);
+    void encodeFrames(const uint8_t* data, size_t size);
+    void deInitEncoder();
+
+   private:
+    OPUS_ENC_DATA_TYPE* mEncoder = nullptr;
+    int mChannels = 0;
+    int mNumSamplesPerFrame = 0;
+    size_t mFrameSize = 0;
+    size_t mNumPcmBytesPerInputFrame = 0;
+};
+
+int get_frame_size(int frameSizeEnum, int samplingRate) {
+    int frameSize = 0;
+    switch (frameSizeEnum) {
+        case OPUS_FRAMESIZE_2_5_MS:
+            frameSize = samplingRate / 400;
+            break;
+        case OPUS_FRAMESIZE_5_MS:
+            frameSize = samplingRate / 200;
+            break;
+        case OPUS_FRAMESIZE_10_MS:
+            frameSize = samplingRate / 100;
+            break;
+        case OPUS_FRAMESIZE_20_MS:
+            frameSize = samplingRate / 50;
+            break;
+        case OPUS_FRAMESIZE_40_MS:
+            frameSize = samplingRate / 25;
+            break;
+        case OPUS_FRAMESIZE_60_MS:
+            frameSize = 3 * samplingRate / 50;
+            break;
+        case OPUS_FRAMESIZE_80_MS:
+            frameSize = 4 * samplingRate / 50;
+            break;
+        case OPUS_FRAMESIZE_100_MS:
+            frameSize = 5 * samplingRate / 50;
+            break;
+        case OPUS_FRAMESIZE_120_MS:
+            frameSize = 6 * samplingRate / 50;
+            break;
+        default:
+            break;
+    }
+    return frameSize;
+}
+
+bool Codec::initEncoder(uint8_t** dataPtr, size_t* sizePtr) {
+    uint8_t* data = *dataPtr;
+
+    int sampleRateIndex = data[IDX_SAMPLE_RATE_INDEX] % kSampleRatesSize;
+    opus_int32 sampleRate = kSampleRates[sampleRateIndex];
+
+#ifdef MULTISTREAM
+    mChannels = generateNumberInRangeFromData(data[IDX_CHANNEL], 1, 255);
+#else
+    int channelIndex = data[IDX_CHANNEL] % kChannelsSize;
+    mChannels = kChannels[channelIndex];
+#endif
+
+    mNumSamplesPerFrame = sampleRate / kFrameDuration;
+    mNumPcmBytesPerInputFrame = mChannels * mNumSamplesPerFrame * sizeof(int16_t);
+
+    int application = kApplications[data[IDX_APPLICATION] % kApplicationsSize];
+    int err = 0;
+    mEncoder = OPUS_ENC_CREATE_API(sampleRate, mChannels, application, &err);
+    if (err) {
+        return false;
+    }
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_APPLICATION(application));
+
+    int complexityIndex = data[IDX_COMPLEXITY] % kComplexitiesSize;
+    int complexity = kComplexities[complexityIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_COMPLEXITY(complexity));
+
+    int setDTXIndex = data[IDX_SET_DTX] % kSetDTXSize;
+    int setDTX = kSetDTX[setDTXIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_DTX(setDTX));
+
+    int signalIndex = data[IDX_SET_SIGNAL] % kSignalsSize;
+    int signal = kSignals[signalIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_SIGNAL(signal));
+
+    int setVBRIndex = data[IDX_SET_VBR] % kSetVBRSize;
+    int setVBR = kSetVBR[setVBRIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_VBR(setVBR));
+
+    int setVBRConstraintIndex = data[IDX_SET_VBR_CONSTRAINT] % kSetVBRConstraintSize;
+    int setVBRConstraint = kSetVBRConstraint[setVBRConstraintIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_VBR_CONSTRAINT(setVBRConstraint));
+
+    // Clubbing 3 bytes of data to ensure bit rate in the range [kMinBitRate, kMaxBitRate]
+    uint32_t tempValue =
+        (data[IDX_BIT_RATE_1] << 16) | (data[IDX_BIT_RATE_2] << 8) | data[IDX_BIT_RATE_3];
+    uint32_t bitRate = generateNumberInRangeFromData(tempValue, kMinBitRate, kMaxBitRate);
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_BITRATE(bitRate));
+
+    int forceChanneIndex = data[IDX_FORCE_CHANNEL_INDEX] % kForceChannelsSize;
+    int forceChannel = kForceChannels[forceChanneIndex];
+    forceChannel = min(forceChannel, mChannels);
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_FORCE_CHANNELS(forceChannel));
+
+    int maxBandwidthIndex = data[IDX_SET_MAX_BANDWIDTH] % kMaxBandwidthsSize;
+    opus_int32 maxBandwidth = kMaxBandwidths[maxBandwidthIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_MAX_BANDWIDTH(maxBandwidth));
+
+    int setInbandFecIndex = data[IDX_SET_INBAND_FEC] % kSetInbandFecSize;
+    int setInbandFec = kSetInbandFec[setInbandFecIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_INBAND_FEC(setInbandFec));
+
+    int pktLossIndex = data[IDX_SET_PACKET_LOSS_PERC] % kPacketLossPercSize;
+    int pktLoss = kPacketLossPerc[pktLossIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_PACKET_LOSS_PERC(pktLoss));
+
+    int lsbDepthIndex = data[IDX_SET_LSB_DEPTH] % kLsbDepthsSize;
+    int lsbDepth = kLsbDepths[lsbDepthIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_LSB_DEPTH(lsbDepth));
+
+    int setPredDisableIndex = data[IDX_SET_PREDICTION_DISABLED] % kSetPredDisableSize;
+    int setPredDisable = kSetPredDisable[setPredDisableIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_PREDICTION_DISABLED(setPredDisable));
+
+    int frameSizesEnumIndex = data[IDX_FRAME_ENUM] % kFrameDurationsSize;
+    int frameSizeEnum = kFrameDurations[frameSizesEnumIndex];
+    OPUS_ENC_CTL_API(mEncoder, OPUS_SET_EXPERT_FRAME_DURATION(frameSizeEnum));
+
+    mFrameSize = get_frame_size(frameSizeEnum, sampleRate);
+    if (mFrameSize == 0) {
+        return false;
+    }
+
+    // Not re-using the data which was used for configuration for encoding
+    *dataPtr += IDX_LAST;
+    *sizePtr -= IDX_LAST;
+    return true;
+}
+
+void Codec::encodeFrames(const uint8_t* data, size_t size) {
+    opus_int16* inputBuffer = (opus_int16*)data;
+    size = size / sizeof(opus_int16);
+    size_t offset = 0;
+    do {
+        size_t frameSize = mFrameSize / mChannels;
+        if (frameSize > (size - offset)) {
+            frameSize = size - offset;
+        }
+        unsigned char packet[kMaxPacket];
+        (void)OPUS_ENC_ENCODE_API(mEncoder, &inputBuffer[offset], frameSize, packet, kMaxPacket);
+        offset += mFrameSize * mChannels;
+    } while (offset < size);
+}
+
+void Codec::deInitEncoder() {
+    if (mEncoder) {
+        OPUS_ENC_DESTROY_API(mEncoder);
+        mEncoder = nullptr;
+    }
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+    if (size < IDX_LAST) {
+        return 0;
+    }
+    Codec encoder;
+    if (encoder.initEncoder(const_cast<uint8_t**>(&data), &size)) {
+        encoder.encodeFrames(data, size);
+    }
+    return 0;
+}
diff --git a/include/opus_projection.h b/include/opus_projection.h
index 9dabf4e..b27453c 100644
--- a/include/opus_projection.h
+++ b/include/opus_projection.h
@@ -412,7 +412,7 @@
   * malloc.
   * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
   * @see opus_projection_decoder_create
-  * @see opus_projection_deocder_get_size
+  * @see opus_projection_decoder_get_size
   * @param st <tt>OpusProjectionDecoder*</tt>: Projection encoder state to initialize.
   * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
   *                                This must be one of 8000, 12000, 16000,
diff --git a/libopus_blocklist.txt b/libopus_blocklist.txt
new file mode 100644
index 0000000..84c19ee
--- /dev/null
+++ b/libopus_blocklist.txt
@@ -0,0 +1,59 @@
+[integer]
+# celt/celt_decoder.c:1055:61: 0 - 1 cannot be represented in type 'unsigned int'
+fun:celt_decode_with_ec
+# celt/celt_encoder.c:2171:75: 0 - 1 cannot be represented in type 'unsigned int'
+fun:celt_encode_with_ec
+fun:celt_lcg_rand
+# celt/entcode.h:131: negation of 100 cannot be represented in type 'opus_uint32'
+fun:celt_udiv
+# celt/mdct.c:273
+# celt/mdct.c:274
+# celt/mdct.c:304
+# celt/mdct.c:305
+# celt/mdct.c:315
+# celt/mdct.c:316
+# celt/mdct.c:336
+# celt/mdct.c:337
+fun:clt_mdct_backward_c
+fun:ec_dec_init
+# celt/entdec.c:143
+fun:ec_decode
+# celt/entdec.c:150
+fun:ec_decode_bin
+# silk/NSQ_del_dec.c:537:38: -242159836 - 2132528648 cannot be represented in type 'int'
+fun:silk_noise_shape_quantizer_del_dec
+# silk/NSQ.c:265:25: 1318152552 + 1068143768 cannot be represented in type 'int'
+fun:silk_noise_shape_quantizer
+# silk/x86/NSQ_del_dec_sse4_1.c:571:28: 1162446838 - -1165932966 cannot be represented in type 'int'
+fun:silk_noise_shape_quantizer_del_dec_sse4_1
+# silk/fixed/x86/burg_modified_FIX_sse4_1.c:277: 1940085720 + 252655088 cannot be represented
+# in type 'int'
+fun:silk_burg_modified_sse4_1
+# silk/fixed/burg_modified_FIX.c:181 1940085720 + 252655088 cannot be represented in type 'int'
+fun:silk_burg_modified_c
+src:*/celt/kiss_fft.c
+
+# assembly optimizations that know what they are doing
+fun:silk_SMULWB_armv4
+fun:silk_SMULWT_armv4
+fun:silk_SMULWW_armv4
+fun:silk_SMLAWW_armv4
+#
+fun:silk_SMULWB_armv5e
+fun:silk_SMLAWB_armv5e
+fun:silk_SMULWT_armv5e
+fun:silk_SMLAWT_armv5e
+fun:silk_SMULBB_armv5e
+fun:silk_SMLABB_armv5e
+fun:silk_SMULBT_armv5e
+fun:silk_SMLABT_armv5e
+fun:silk_ADD_SAT32_armv5e
+fun:silk_SUB_SAT32_armv5e
+fun:silk_CLZ16_armv5
+fun:silk_CLZ32_armv5
+
+
+# Performance related
+fun:exp_rotation1
+fun:haar1
+fun:celt_preemphasis
diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index fbdfa82..65fe6a0 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -448,13 +448,29 @@
 
 /* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
    (just standard two's complement implementation-specific behaviour) */
-#define silk_ADD32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
+static OPUS_INLINE opus_int32 silk_ADD32_ovflw(opus_int32 a, opus_int32 b) {
+    opus_int32  _c;
+    __builtin_add_overflow(a, b, &_c);
+    return _c;
+}
+
 /* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
    (just standard two's complement implementation-specific behaviour) */
-#define silk_SUB32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
+static OPUS_INLINE opus_int32 silk_SUB32_ovflw(opus_int32 a, opus_int32 b) {
+    opus_int32  _c;
+    __builtin_sub_overflow(a, b, &_c);
+    return _c;
+}
 
 /* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
-#define silk_MLA_ovflw(a32, b32, c32)       silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
+/* .. also ignoring multiply overflows; caller has comment about this happening occasionally */
+static OPUS_INLINE opus_int32 silk_MLA_ovflw(opus_int32 a, opus_int32 b, opus_int32 c) {
+    opus_int32 _d, _e;
+    __builtin_mul_overflow(b, c, &_d);
+    __builtin_add_overflow(a, _d, &_e);
+    return _e;
+}
+
 #define silk_SMLABB_ovflw(a32, b32, c32)    (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
 
 #define silk_DIV32_16(a32, b16)             ((opus_int32)((a32) / (b16)))
@@ -496,7 +512,12 @@
 /* Add with saturation for positive input values */
 #define silk_ADD_POS_SAT8(a, b)             ((((a)+(b)) & 0x80)                 ? silk_int8_MAX  : ((a)+(b)))
 #define silk_ADD_POS_SAT16(a, b)            ((((a)+(b)) & 0x8000)               ? silk_int16_MAX : ((a)+(b)))
-#define silk_ADD_POS_SAT32(a, b)            ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
+static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int32 a, opus_int32 b) {
+    opus_int32  _c;
+    if (__builtin_add_overflow(a, b, &_c))
+        return silk_int32_MAX;
+    return _c;
+}
 
 #define silk_LSHIFT8(a, shift)              ((opus_int8)((opus_uint8)(a)<<(shift)))         /* shift >= 0, shift < 8  */
 #define silk_LSHIFT16(a, shift)             ((opus_int16)((opus_uint16)(a)<<(shift)))       /* shift >= 0, shift < 16 */
diff --git a/silk/macros.h b/silk/macros.h
index 3c67b6e..00ccca3 100644
--- a/silk/macros.h
+++ b/silk/macros.h
@@ -33,6 +33,7 @@
 #endif
 
 #include "opus_types.h"
+#include "typedef.h"
 #include "opus_defines.h"
 #include "arch.h"
 
@@ -96,13 +97,31 @@
 #endif
 
 /* add/subtract with output saturated */
-#define silk_ADD_SAT32(a, b)             ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ?                              \
-                                        ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) :   \
-                                        ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
+/* use clang builtin overflow detectors */
+static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a, opus_int32 b) {
+    opus_int32 c;
+    if (__builtin_add_overflow(a, b, &c)) {
+        // overflowed
+        if (a < 0)      // neg+X can only overflow towards -inf
+            c = silk_int32_MIN;
+        else
+            c = silk_int32_MAX;
+    }
+    return c;
+}
 
-#define silk_SUB_SAT32(a, b)             ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ?                                        \
-                                        (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
-                                        ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
+/* use clang builtin overflow detectors */
+static OPUS_INLINE opus_int32 silk_SUB_SAT32(opus_int32 a, opus_int32 b) {
+    opus_int32 c;
+    if (__builtin_sub_overflow(a, b, &c)) {
+        // overflowed,
+        if (a < 0) // neg-X only overflows towards -inf
+            c = silk_int32_MIN;
+        else
+            c = silk_int32_MAX;
+    }
+    return c;
+}
 
 #if defined(MIPSr1_ASM)
 #include "mips/macros_mipsr1.h"
diff --git a/src/analysis.c b/src/analysis.c
index 1f58013..b6f13b0 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -149,7 +149,10 @@
         out32_hp  = ADD32( out32_hp, X );
         S[ 2 ] = ADD32( -in32, X );
 
-        hp_ener += out32_hp*(opus_val64)out32_hp;
+        if(__builtin_add_overflow(hp_ener, out32_hp*(opus_val64)out32_hp, &hp_ener))
+        {
+           hp_ener = UINT64_MAX;
+        }
         /* Add, convert back to int16 and store to output */
         out[ k ] = HALF32(out32);
     }