src/tools/miri/src/shims/x86/sse41.rs - toolchain/rustc - Git at Google

 use rustc_middle::mir;
 use rustc_span::Symbol;
 use rustc_target::abi::Size;
 use rustc_target::spec::abi::Abi;

 use crate::*;
 use shims::foreign_items::EmulateForeignItemResult;

 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
 pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
     crate::MiriInterpCxExt<'mir, 'tcx>
 {
     fn emulate_x86_sse41_intrinsic(
         &mut self,
         link_name: Symbol,
         abi: Abi,
         args: &[OpTy<'tcx, Provenance>],
         dest: &PlaceTy<'tcx, Provenance>,
     ) -> InterpResult<'tcx, EmulateForeignItemResult> {
         let this = self.eval_context_mut();
         // Prefix should have already been checked.
         let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();

         match unprefixed_name {
             // Used to implement the _mm_insert_ps function.
             // Takes one element of `right` and inserts it into `left` and
             // optionally zero some elements. Source index is specified
             // in bits `6..=7` of `imm`, destination index is specified in
             // bits `4..=5` if `imm`, and `i`th bit specifies whether element
             // `i` is zeroed.
             "insertps" => {
                 let [left, right, imm] =
                     this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 let (left, left_len) = this.operand_to_simd(left)?;
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.place_to_simd(dest)?;

                 assert_eq!(dest_len, left_len);
                 assert_eq!(dest_len, right_len);
                 assert!(dest_len <= 4);

                 let imm = this.read_scalar(imm)?.to_u8()?;
                 let src_index = u64::from((imm >> 6) & 0b11);
                 let dst_index = u64::from((imm >> 4) & 0b11);

                 let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;

                 for i in 0..dest_len {
                     let dest = this.project_index(&dest, i)?;

                     if imm & (1 << i) != 0 {
                         // zeroed
                         this.write_scalar(Scalar::from_u32(0), &dest)?;
                     } else if i == dst_index {
                         // copy from `right` at specified index
                         this.write_immediate(*src_value, &dest)?;
                     } else {
                         // copy from `left`
                         this.copy_op(
                             &this.project_index(&left, i)?,
                             &dest,
                             /*allow_transmute*/ false,
                         )?;
                     }
                 }
             }
             // Used to implement the _mm_packus_epi32 function.
             // Concatenates two 32-bit signed integer vectors and converts
             // the result to a 16-bit unsigned integer vector with saturation.
             "packusdw" => {
                 let [left, right] =
                     this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 let (left, left_len) = this.operand_to_simd(left)?;
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.place_to_simd(dest)?;

                 assert_eq!(left_len, right_len);
                 assert_eq!(dest_len, left_len.checked_mul(2).unwrap());

                 for i in 0..left_len {
                     let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i32()?;
                     let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i32()?;
                     let left_dest = this.project_index(&dest, i)?;
                     let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;

                     let left_res =
                         u16::try_from(left).unwrap_or(if left < 0 { 0 } else { u16::MAX });
                     let right_res =
                         u16::try_from(right).unwrap_or(if right < 0 { 0 } else { u16::MAX });

                     this.write_scalar(Scalar::from_u16(left_res), &left_dest)?;
                     this.write_scalar(Scalar::from_u16(right_res), &right_dest)?;
                 }
             }
             // Used to implement the _mm_dp_ps and _mm_dp_pd functions.
             // Conditionally multiplies the packed floating-point elements in
             // `left` and `right` using the high 4 bits in `imm`, sums the four
             // products, and conditionally stores the sum in `dest` using the low
             // 4 bits of `imm`.
             "dpps" | "dppd" => {
                 let [left, right, imm] =
                     this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 let (left, left_len) = this.operand_to_simd(left)?;
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.place_to_simd(dest)?;

                 assert_eq!(left_len, right_len);
                 assert!(dest_len <= 4);

                 let imm = this.read_scalar(imm)?.to_u8()?;

                 let element_layout = left.layout.field(this, 0);

                 // Calculate dot product
                 // Elements are floating point numbers, but we can use `from_int`
                 // because the representation of 0.0 is all zero bits.
                 let mut sum = ImmTy::from_int(0u8, element_layout);
                 for i in 0..left_len {
                     if imm & (1 << i.checked_add(4).unwrap()) != 0 {
                         let left = this.read_immediate(&this.project_index(&left, i)?)?;
                         let right = this.read_immediate(&this.project_index(&right, i)?)?;

                         let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
                         sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?;
                     }
                 }

                 // Write to destination (conditioned to imm)
                 for i in 0..dest_len {
                     let dest = this.project_index(&dest, i)?;

                     if imm & (1 << i) != 0 {
                         this.write_immediate(*sum, &dest)?;
                     } else {
                         this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?;
                     }
                 }
             }
             // Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
             // functions. Rounds the first element of `right` according to `rounding`
             // and copies the remaining elements from `left`.
             "round.ss" => {
                 let [left, right, rounding] =
                     this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
             }
             // Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
             // functions. Rounds the first element of `right` according to `rounding`
             // and copies the remaining elements from `left`.
             "round.sd" => {
                 let [left, right, rounding] =
                     this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
             }
             // Used to implement the _mm_minpos_epu16 function.
             // Find the minimum unsinged 16-bit integer in `op` and
             // returns its value and position.
             "phminposuw" => {
                 let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 let (op, op_len) = this.operand_to_simd(op)?;
                 let (dest, dest_len) = this.place_to_simd(dest)?;

                 // Find minimum
                 let mut min_value = u16::MAX;
                 let mut min_index = 0;
                 for i in 0..op_len {
                     let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
                     if op < min_value {
                         min_value = op;
                         min_index = i;
                     }
                 }

                 // Write value and index
                 this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
                 this.write_scalar(
                     Scalar::from_u16(min_index.try_into().unwrap()),
                     &this.project_index(&dest, 1)?,
                 )?;
                 // Fill remaining with zeros
                 for i in 2..dest_len {
                     this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
                 }
             }
             // Used to implement the _mm_mpsadbw_epu8 function.
             // Compute the sum of absolute differences of quadruplets of unsigned
             // 8-bit integers in `left` and `right`, and store the 16-bit results
             // in `right`. Quadruplets are selected from `left` and `right` with
             // offsets specified in `imm`.
             // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
             "mpsadbw" => {
                 let [left, right, imm] =
                     this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 let (left, left_len) = this.operand_to_simd(left)?;
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.place_to_simd(dest)?;

                 assert_eq!(left_len, right_len);
                 assert_eq!(left_len, dest_len.checked_mul(2).unwrap());

                 let imm = this.read_scalar(imm)?.to_u8()?;
                 // Bit 2 of `imm` specifies the offset for indices of `left`.
                 // The offset is 0 when the bit is 0 or 4 when the bit is 1.
                 let left_offset = u64::from((imm >> 2) & 1).checked_mul(4).unwrap();
                 // Bits 0..=1 of `imm` specify the offset for indices of
                 // `right` in blocks of 4 elements.
                 let right_offset = u64::from(imm & 0b11).checked_mul(4).unwrap();

                 for i in 0..dest_len {
                     let left_offset = left_offset.checked_add(i).unwrap();
                     let mut res: u16 = 0;
                     for j in 0..4 {
                         let left = this
                             .read_scalar(
                                 &this.project_index(&left, left_offset.checked_add(j).unwrap())?,
                             )?
                             .to_u8()?;
                         let right = this
                             .read_scalar(
                                 &this
                                     .project_index(&right, right_offset.checked_add(j).unwrap())?,
                             )?
                             .to_u8()?;
                         res = res.checked_add(left.abs_diff(right).into()).unwrap();
                     }
                     this.write_scalar(Scalar::from_u16(res), &this.project_index(&dest, i)?)?;
                 }
             }
             // Used to implement the _mm_testz_si128, _mm_testc_si128
             // and _mm_testnzc_si128 functions.
             // Tests `op & mask == 0`, `op & mask == mask` or
             // `op & mask != 0 && op & mask != mask`
             "ptestz" | "ptestc" | "ptestnzc" => {
                 let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

                 let (op, op_len) = this.operand_to_simd(op)?;
                 let (mask, mask_len) = this.operand_to_simd(mask)?;

                 assert_eq!(op_len, mask_len);

                 let f = match unprefixed_name {
                     "ptestz" => |op, mask| op & mask == 0,
                     "ptestc" => |op, mask| op & mask == mask,
                     "ptestnzc" => |op, mask| op & mask != 0 && op & mask != mask,
                     _ => unreachable!(),
                 };

                 let mut all_zero = true;
                 for i in 0..op_len {
                     let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u64()?;
                     let mask = this.read_scalar(&this.project_index(&mask, i)?)?.to_u64()?;
                     all_zero &= f(op, mask);
                 }

                 this.write_scalar(Scalar::from_i32(all_zero.into()), dest)?;
             }
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         }
         Ok(EmulateForeignItemResult::NeedsJumping)
     }
 }

 // Rounds the first element of `right` according to `rounding`
 // and copies the remaining elements from `left`.
 fn round_first<'tcx, F: rustc_apfloat::Float>(
     this: &mut crate::MiriInterpCx<'_, 'tcx>,
     left: &OpTy<'tcx, Provenance>,
     right: &OpTy<'tcx, Provenance>,
     rounding: &OpTy<'tcx, Provenance>,
     dest: &PlaceTy<'tcx, Provenance>,
 ) -> InterpResult<'tcx, ()> {
     let (left, left_len) = this.operand_to_simd(left)?;
     let (right, right_len) = this.operand_to_simd(right)?;
     let (dest, dest_len) = this.place_to_simd(dest)?;

     assert_eq!(dest_len, left_len);
     assert_eq!(dest_len, right_len);

     // The fourth bit of `rounding` only affects the SSE status
     // register, which cannot be accessed from Miri (or from Rust,
     // for that matter), so we can ignore it.
     let rounding = match this.read_scalar(rounding)?.to_i32()? & !0b1000 {
         // When the third bit is 0, the rounding mode is determined by the
         // first two bits.
         0b000 => rustc_apfloat::Round::NearestTiesToEven,
         0b001 => rustc_apfloat::Round::TowardNegative,
         0b010 => rustc_apfloat::Round::TowardPositive,
         0b011 => rustc_apfloat::Round::TowardZero,
         // When the third bit is 1, the rounding mode is determined by the
         // SSE status register. Since we do not support modifying it from
         // Miri (or Rust), we assume it to be at its default mode (round-to-nearest).
         0b100..=0b111 => rustc_apfloat::Round::NearestTiesToEven,
         rounding => throw_unsup_format!("unsupported rounding mode 0x{rounding:02x}"),
     };

     let op0: F = this.read_scalar(&this.project_index(&right, 0)?)?.to_float()?;
     let res = op0.round_to_integral(rounding).value;
     this.write_scalar(
         Scalar::from_uint(res.to_bits(), Size::from_bits(F::BITS)),
         &this.project_index(&dest, 0)?,
     )?;

     for i in 1..dest_len {
         this.copy_op(
             &this.project_index(&left, i)?,
             &this.project_index(&dest, i)?,
             /*allow_transmute*/ false,
         )?;
     }

     Ok(())
 }
	use rustc_middle::mir;
	use rustc_span::Symbol;
	use rustc_target::abi::Size;
	use rustc_target::spec::abi::Abi;

	use crate::*;
	use shims::foreign_items::EmulateForeignItemResult;

	impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
	pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
	crate::MiriInterpCxExt<'mir, 'tcx>
	{
	fn emulate_x86_sse41_intrinsic(
	&mut self,
	link_name: Symbol,
	abi: Abi,
	args: &[OpTy<'tcx, Provenance>],
	dest: &PlaceTy<'tcx, Provenance>,
	) -> InterpResult<'tcx, EmulateForeignItemResult> {
	let this = self.eval_context_mut();
	// Prefix should have already been checked.
	let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();

	match unprefixed_name {
	// Used to implement the _mm_insert_ps function.
	// Takes one element of `right` and inserts it into `left` and
	// optionally zero some elements. Source index is specified
	// in bits `6..=7` of `imm`, destination index is specified in
	// bits `4..=5` if `imm`, and `i`th bit specifies whether element
	// `i` is zeroed.
	"insertps" => {
	let [left, right, imm] =
	this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	let (left, left_len) = this.operand_to_simd(left)?;
	let (right, right_len) = this.operand_to_simd(right)?;
	let (dest, dest_len) = this.place_to_simd(dest)?;

	assert_eq!(dest_len, left_len);
	assert_eq!(dest_len, right_len);
	assert!(dest_len <= 4);

	let imm = this.read_scalar(imm)?.to_u8()?;
	let src_index = u64::from((imm >> 6) & 0b11);
	let dst_index = u64::from((imm >> 4) & 0b11);

	let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;

	for i in 0..dest_len {
	let dest = this.project_index(&dest, i)?;

	if imm & (1 << i) != 0 {
	// zeroed
	this.write_scalar(Scalar::from_u32(0), &dest)?;
	} else if i == dst_index {
	// copy from `right` at specified index
	this.write_immediate(*src_value, &dest)?;
	} else {
	// copy from `left`
	this.copy_op(
	&this.project_index(&left, i)?,
	&dest,
	/allow_transmute/ false,
	)?;
	}
	}
	}
	// Used to implement the _mm_packus_epi32 function.
	// Concatenates two 32-bit signed integer vectors and converts
	// the result to a 16-bit unsigned integer vector with saturation.
	"packusdw" => {
	let [left, right] =
	this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	let (left, left_len) = this.operand_to_simd(left)?;
	let (right, right_len) = this.operand_to_simd(right)?;
	let (dest, dest_len) = this.place_to_simd(dest)?;

	assert_eq!(left_len, right_len);
	assert_eq!(dest_len, left_len.checked_mul(2).unwrap());

	for i in 0..left_len {
	let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i32()?;
	let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i32()?;
	let left_dest = this.project_index(&dest, i)?;
	let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;

	let left_res =
	u16::try_from(left).unwrap_or(if left < 0 { 0 } else { u16::MAX });
	let right_res =
	u16::try_from(right).unwrap_or(if right < 0 { 0 } else { u16::MAX });

	this.write_scalar(Scalar::from_u16(left_res), &left_dest)?;
	this.write_scalar(Scalar::from_u16(right_res), &right_dest)?;
	}
	}
	// Used to implement the _mm_dp_ps and _mm_dp_pd functions.
	// Conditionally multiplies the packed floating-point elements in
	// `left` and `right` using the high 4 bits in `imm`, sums the four
	// products, and conditionally stores the sum in `dest` using the low
	// 4 bits of `imm`.
	"dpps" \| "dppd" => {
	let [left, right, imm] =
	this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	let (left, left_len) = this.operand_to_simd(left)?;
	let (right, right_len) = this.operand_to_simd(right)?;
	let (dest, dest_len) = this.place_to_simd(dest)?;

	assert_eq!(left_len, right_len);
	assert!(dest_len <= 4);

	let imm = this.read_scalar(imm)?.to_u8()?;

	let element_layout = left.layout.field(this, 0);

	// Calculate dot product
	// Elements are floating point numbers, but we can use `from_int`
	// because the representation of 0.0 is all zero bits.
	let mut sum = ImmTy::from_int(0u8, element_layout);
	for i in 0..left_len {
	if imm & (1 << i.checked_add(4).unwrap()) != 0 {
	let left = this.read_immediate(&this.project_index(&left, i)?)?;
	let right = this.read_immediate(&this.project_index(&right, i)?)?;

	let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
	sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?;
	}
	}

	// Write to destination (conditioned to imm)
	for i in 0..dest_len {
	let dest = this.project_index(&dest, i)?;

	if imm & (1 << i) != 0 {
	this.write_immediate(*sum, &dest)?;
	} else {
	this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?;
	}
	}
	}
	// Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
	// functions. Rounds the first element of `right` according to `rounding`
	// and copies the remaining elements from `left`.
	"round.ss" => {
	let [left, right, rounding] =
	this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
	}
	// Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
	// functions. Rounds the first element of `right` according to `rounding`
	// and copies the remaining elements from `left`.
	"round.sd" => {
	let [left, right, rounding] =
	this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
	}
	// Used to implement the _mm_minpos_epu16 function.
	// Find the minimum unsinged 16-bit integer in `op` and
	// returns its value and position.
	"phminposuw" => {
	let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	let (op, op_len) = this.operand_to_simd(op)?;
	let (dest, dest_len) = this.place_to_simd(dest)?;

	// Find minimum
	let mut min_value = u16::MAX;
	let mut min_index = 0;
	for i in 0..op_len {
	let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
	if op < min_value {
	min_value = op;
	min_index = i;
	}
	}

	// Write value and index
	this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
	this.write_scalar(
	Scalar::from_u16(min_index.try_into().unwrap()),
	&this.project_index(&dest, 1)?,
	)?;
	// Fill remaining with zeros
	for i in 2..dest_len {
	this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
	}
	}
	// Used to implement the _mm_mpsadbw_epu8 function.
	// Compute the sum of absolute differences of quadruplets of unsigned
	// 8-bit integers in `left` and `right`, and store the 16-bit results
	// in `right`. Quadruplets are selected from `left` and `right` with
	// offsets specified in `imm`.
	// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
	"mpsadbw" => {
	let [left, right, imm] =
	this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	let (left, left_len) = this.operand_to_simd(left)?;
	let (right, right_len) = this.operand_to_simd(right)?;
	let (dest, dest_len) = this.place_to_simd(dest)?;

	assert_eq!(left_len, right_len);
	assert_eq!(left_len, dest_len.checked_mul(2).unwrap());

	let imm = this.read_scalar(imm)?.to_u8()?;
	// Bit 2 of `imm` specifies the offset for indices of `left`.
	// The offset is 0 when the bit is 0 or 4 when the bit is 1.
	let left_offset = u64::from((imm >> 2) & 1).checked_mul(4).unwrap();
	// Bits 0..=1 of `imm` specify the offset for indices of
	// `right` in blocks of 4 elements.
	let right_offset = u64::from(imm & 0b11).checked_mul(4).unwrap();

	for i in 0..dest_len {
	let left_offset = left_offset.checked_add(i).unwrap();
	let mut res: u16 = 0;
	for j in 0..4 {
	let left = this
	.read_scalar(
	&this.project_index(&left, left_offset.checked_add(j).unwrap())?,
	)?
	.to_u8()?;
	let right = this
	.read_scalar(
	&this
	.project_index(&right, right_offset.checked_add(j).unwrap())?,
	)?
	.to_u8()?;
	res = res.checked_add(left.abs_diff(right).into()).unwrap();
	}
	this.write_scalar(Scalar::from_u16(res), &this.project_index(&dest, i)?)?;
	}
	}
	// Used to implement the _mm_testz_si128, _mm_testc_si128
	// and _mm_testnzc_si128 functions.
	// Tests `op & mask == 0`, `op & mask == mask` or
	// `op & mask != 0 && op & mask != mask`
	"ptestz" \| "ptestc" \| "ptestnzc" => {
	let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

	let (op, op_len) = this.operand_to_simd(op)?;
	let (mask, mask_len) = this.operand_to_simd(mask)?;

	assert_eq!(op_len, mask_len);

	let f = match unprefixed_name {
	"ptestz" => \|op, mask\| op & mask == 0,
	"ptestc" => \|op, mask\| op & mask == mask,
	"ptestnzc" => \|op, mask\| op & mask != 0 && op & mask != mask,
	_ => unreachable!(),
	};

	let mut all_zero = true;
	for i in 0..op_len {
	let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u64()?;
	let mask = this.read_scalar(&this.project_index(&mask, i)?)?.to_u64()?;
	all_zero &= f(op, mask);
	}

	this.write_scalar(Scalar::from_i32(all_zero.into()), dest)?;
	}
	_ => return Ok(EmulateForeignItemResult::NotSupported),
	}
	Ok(EmulateForeignItemResult::NeedsJumping)
	}
	}

	// Rounds the first element of `right` according to `rounding`
	// and copies the remaining elements from `left`.
	fn round_first<'tcx, F: rustc_apfloat::Float>(
	this: &mut crate::MiriInterpCx<'_, 'tcx>,
	left: &OpTy<'tcx, Provenance>,
	right: &OpTy<'tcx, Provenance>,
	rounding: &OpTy<'tcx, Provenance>,
	dest: &PlaceTy<'tcx, Provenance>,
	) -> InterpResult<'tcx, ()> {
	let (left, left_len) = this.operand_to_simd(left)?;
	let (right, right_len) = this.operand_to_simd(right)?;
	let (dest, dest_len) = this.place_to_simd(dest)?;

	assert_eq!(dest_len, left_len);
	assert_eq!(dest_len, right_len);

	// The fourth bit of `rounding` only affects the SSE status
	// register, which cannot be accessed from Miri (or from Rust,
	// for that matter), so we can ignore it.
	let rounding = match this.read_scalar(rounding)?.to_i32()? & !0b1000 {
	// When the third bit is 0, the rounding mode is determined by the
	// first two bits.
	0b000 => rustc_apfloat::Round::NearestTiesToEven,
	0b001 => rustc_apfloat::Round::TowardNegative,
	0b010 => rustc_apfloat::Round::TowardPositive,
	0b011 => rustc_apfloat::Round::TowardZero,
	// When the third bit is 1, the rounding mode is determined by the
	// SSE status register. Since we do not support modifying it from
	// Miri (or Rust), we assume it to be at its default mode (round-to-nearest).
	0b100..=0b111 => rustc_apfloat::Round::NearestTiesToEven,
	rounding => throw_unsup_format!("unsupported rounding mode 0x{rounding:02x}"),
	};

	let op0: F = this.read_scalar(&this.project_index(&right, 0)?)?.to_float()?;
	let res = op0.round_to_integral(rounding).value;
	this.write_scalar(
	Scalar::from_uint(res.to_bits(), Size::from_bits(F::BITS)),
	&this.project_index(&dest, 0)?,
	)?;

	for i in 1..dest_len {
	this.copy_op(
	&this.project_index(&left, i)?,
	&this.project_index(&dest, i)?,
	/allow_transmute/ false,
	)?;
	}

	Ok(())
	}