Fix for LLSC implementation on ~VEX_HWCAPS_ARM_NEON.

This is a backport of the following patch by Igor Saenko:
https://bugs.kde.org/show_bug.cgi?id=266035#c43

Change-Id: I95e1591568c2a90ae4b49556abca695d1a9ec7a2
diff --git a/main/VEX/priv/host_arm_isel.c b/main/VEX/priv/host_arm_isel.c
index c8488a2..45c6f37 100644
--- a/main/VEX/priv/host_arm_isel.c
+++ b/main/VEX/priv/host_arm_isel.c
@@ -5835,12 +5835,20 @@
             return;
          } else if (ty == Ity_I64) {
             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
-            HReg dstHi, dstLo;
             addInstr(env, mk_iMOVds_RR(hregARM_R0(), raddr));
             addInstr(env, ARMInstr_LdrEX(8 /* 64-bit */));
-            lookupIRTemp64(&dstHi, &dstLo, env, res);
-            addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R2()) );
-            addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R3()) );
+            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+                HReg tmp = lookupIRTemp(env, res);
+                addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R2(),
+                        hregARM_R3()));
+            } else {
+                HReg dstHi, dstLo;
+                /* The returned value is in r1:r0.  Park it in the
+                   register-pair associated with tmp. */
+                lookupIRTemp64( &dstHi, &dstLo, env, res);
+                addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R2()) );
+                addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R3()) );
+            }
             return;
          }
          /* else fall thru; is unhandled */