refmvs: Fix buffer overread in save_tmvs() asm

The refmvs_block struct is only 12 bytes large but it's accessed
using 16-byte unaligned loads in asm.

In order to avoid reading past the end of the allocated buffer
we therefore need to pad the allocation size by 4 bytes.

Bug: 331840375
Bug: 335203555
Test: treehugger

Change-Id: I02a93fc12bf3cfc20dda546a70f62ee29430a9a7
diff --git a/src/refmvs.c b/src/refmvs.c
index 200afeb..1da024b 100644
--- a/src/refmvs.c
+++ b/src/refmvs.c
@@ -817,7 +817,9 @@
     if (r_stride != rf->r_stride || n_tile_rows != rf->n_tile_rows) {
         if (rf->r) dav1d_freep_aligned(&rf->r);
         const int uses_2pass = n_tile_threads > 1 && n_frame_threads > 1;
-        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, sizeof(*rf->r) * 35 * r_stride * n_tile_rows * (1 + uses_2pass), 64);
+        /* sizeof(refmvs_block) == 12 but it's accessed using 16-byte loads in asm,
+         * so add 4 bytes of padding to avoid buffer overreads. */
+        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, sizeof(*rf->r) * 35 * r_stride * n_tile_rows * (1 + uses_2pass) + 4, 64);
         if (!rf->r) return DAV1D_ERR(ENOMEM);
         rf->r_stride = r_stride;
     }