compiler/rustc_codegen_gcc/src/intrinsic/simd.rs - toolchain/rustc - Git at Google

 use gccjit::ToRValue;
 use gccjit::{BinaryOp, RValue, Type};
 #[cfg(feature = "master")]
 use gccjit::{ComparisonOp, UnaryOp};

 use rustc_codegen_ssa::base::compare_simd_types;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
 #[cfg(feature = "master")]
 use rustc_codegen_ssa::errors::ExpectedPointerMutability;
 use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::OperandRef;
 use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
 use rustc_hir as hir;
 use rustc_middle::span_bug;
 use rustc_middle::ty::layout::HasTyCtxt;
 use rustc_middle::ty::{self, Ty};
 use rustc_span::{sym, Span, Symbol};
 use rustc_target::abi::Align;

 use crate::builder::Builder;
 #[cfg(feature = "master")]
 use crate::context::CodegenCx;

 pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     bx: &mut Builder<'a, 'gcc, 'tcx>,
     name: Symbol,
     callee_ty: Ty<'tcx>,
     args: &[OperandRef<'tcx, RValue<'gcc>>],
     ret_ty: Ty<'tcx>,
     llret_ty: Type<'gcc>,
     span: Span,
 ) -> Result<RValue<'gcc>, ()> {
     // macros for error handling:
     macro_rules! return_error {
         ($err:expr) => {{
             bx.sess().emit_err($err);
             return Err(());
         }};
     }
     macro_rules! require {
         ($cond:expr, $err:expr) => {
             if !$cond {
                 return_error!($err);
             }
         };
     }
     macro_rules! require_simd {
         ($ty: expr, $diag: expr) => {
             require!($ty.is_simd(), $diag)
         };
     }

     let tcx = bx.tcx();
     let sig =
         tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), callee_ty.fn_sig(tcx));
     let arg_tys = sig.inputs();

     if name == sym::simd_select_bitmask {
         require_simd!(
             arg_tys[1],
             InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] }
         );
         let (len, _) = arg_tys[1].simd_size_and_type(bx.tcx());

         let expected_int_bits = (len.max(8) - 1).next_power_of_two();
         let expected_bytes = len / 8 + ((len % 8 > 0) as u64);

         let mask_ty = arg_tys[0];
         let mut mask = match mask_ty.kind() {
             ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
             ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
             ty::Array(elem, len)
                 if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
                     && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all())
                         == Some(expected_bytes) =>
             {
                 let place = PlaceRef::alloca(bx, args[0].layout);
                 args[0].val.store(bx, place);
                 let int_ty = bx.type_ix(expected_bytes * 8);
                 let ptr = bx.pointercast(place.llval, bx.cx.type_ptr_to(int_ty));
                 bx.load(int_ty, ptr, Align::ONE)
             }
             _ => return_error!(InvalidMonomorphization::InvalidBitmask {
                 span,
                 name,
                 mask_ty,
                 expected_int_bits,
                 expected_bytes
             }),
         };

         let arg1 = args[1].immediate();
         let arg1_type = arg1.get_type();
         let arg1_vector_type = arg1_type.unqualified().dyncast_vector().expect("vector type");
         let arg1_element_type = arg1_vector_type.get_element_type();

         // NOTE: since the arguments can be vectors of floats, make sure the mask is a vector of
         // integer.
         let mask_element_type = bx.type_ix(arg1_element_type.get_size() as u64 * 8);
         let vector_mask_type =
             bx.context.new_vector_type(mask_element_type, arg1_vector_type.get_num_units() as u64);

         let mut elements = vec![];
         let one = bx.context.new_rvalue_one(mask.get_type());
         for _ in 0..len {
             let element = bx.context.new_cast(None, mask & one, mask_element_type);
             elements.push(element);
             mask = mask >> one;
         }
         let vector_mask = bx.context.new_rvalue_from_vector(None, vector_mask_type, &elements);

         return Ok(bx.vector_select(vector_mask, arg1, args[2].immediate()));
     }

     // every intrinsic below takes a SIMD vector as its first argument
     require_simd!(arg_tys[0], InvalidMonomorphization::SimdInput { span, name, ty: arg_tys[0] });
     let in_ty = arg_tys[0];

     let comparison = match name {
         sym::simd_eq => Some(hir::BinOpKind::Eq),
         sym::simd_ne => Some(hir::BinOpKind::Ne),
         sym::simd_lt => Some(hir::BinOpKind::Lt),
         sym::simd_le => Some(hir::BinOpKind::Le),
         sym::simd_gt => Some(hir::BinOpKind::Gt),
         sym::simd_ge => Some(hir::BinOpKind::Ge),
         _ => None,
     };

     let (in_len, in_elem) = arg_tys[0].simd_size_and_type(bx.tcx());
     if let Some(cmp_op) = comparison {
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });

         let (out_len, out_ty) = ret_ty.simd_size_and_type(bx.tcx());
         require!(
             in_len == out_len,
             InvalidMonomorphization::ReturnLengthInputType {
                 span,
                 name,
                 in_len,
                 in_ty,
                 ret_ty,
                 out_len
             }
         );
         require!(
             bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
             InvalidMonomorphization::ReturnIntegerType { span, name, ret_ty, out_ty }
         );

         let arg1 = args[0].immediate();
         // NOTE: we get different vector types for the same vector type and libgccjit doesn't
         // compare them as equal, so bitcast.
         // FIXME(antoyo): allow comparing vector types as equal in libgccjit.
         let arg2 = bx.context.new_bitcast(None, args[1].immediate(), arg1.get_type());
         return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op));
     }

     if name == sym::simd_shuffle {
         // Make sure this is actually an array, since typeck only checks the length-suffixed
         // version of this intrinsic.
         let n: u64 = match args[2].layout.ty.kind() {
             ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
                 len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(
                     || span_bug!(span, "could not evaluate shuffle index array length"),
                 )
             }
             _ => return_error!(InvalidMonomorphization::SimdShuffle {
                 span,
                 name,
                 ty: args[2].layout.ty
             }),
         };
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });

         let (out_len, out_ty) = ret_ty.simd_size_and_type(bx.tcx());
         require!(
             out_len == n,
             InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
         );
         require!(
             in_elem == out_ty,
             InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
         );

         let vector = args[2].immediate();

         return Ok(bx.shuffle_vector(args[0].immediate(), args[1].immediate(), vector));
     }

     #[cfg(feature = "master")]
     if name == sym::simd_insert {
         require!(
             in_elem == arg_tys[2],
             InvalidMonomorphization::InsertedType {
                 span,
                 name,
                 in_elem,
                 in_ty,
                 out_ty: arg_tys[2]
             }
         );
         let vector = args[0].immediate();
         let index = args[1].immediate();
         let value = args[2].immediate();
         let variable = bx.current_func().new_local(None, vector.get_type(), "new_vector");
         bx.llbb().add_assignment(None, variable, vector);
         let lvalue = bx.context.new_vector_access(None, variable.to_rvalue(), index);
         // TODO(antoyo): if simd_insert is constant, use BIT_REF.
         bx.llbb().add_assignment(None, lvalue, value);
         return Ok(variable.to_rvalue());
     }

     #[cfg(feature = "master")]
     if name == sym::simd_extract {
         require!(
             ret_ty == in_elem,
             InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
         );
         let vector = args[0].immediate();
         return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue());
     }

     if name == sym::simd_select {
         let m_elem_ty = in_elem;
         let m_len = in_len;
         require_simd!(
             arg_tys[1],
             InvalidMonomorphization::SimdArgument { span, name, ty: arg_tys[1] }
         );
         let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
         require!(
             m_len == v_len,
             InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
         );
         match m_elem_ty.kind() {
             ty::Int(_) => {}
             _ => return_error!(InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }),
         }
         return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
     }

     #[cfg(feature = "master")]
     if name == sym::simd_cast || name == sym::simd_as {
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
         let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
         require!(
             in_len == out_len,
             InvalidMonomorphization::ReturnLengthInputType {
                 span,
                 name,
                 in_len,
                 in_ty,
                 ret_ty,
                 out_len
             }
         );
         // casting cares about nominal type, not just structural type
         if in_elem == out_elem {
             return Ok(args[0].immediate());
         }

         enum Style {
             Float,
             Int,
             Unsupported,
         }

         let in_style = match in_elem.kind() {
             ty::Int(_) | ty::Uint(_) => Style::Int,
             ty::Float(_) => Style::Float,
             _ => Style::Unsupported,
         };

         let out_style = match out_elem.kind() {
             ty::Int(_) | ty::Uint(_) => Style::Int,
             ty::Float(_) => Style::Float,
             _ => Style::Unsupported,
         };

         match (in_style, out_style) {
             (Style::Unsupported, Style::Unsupported) => {
                 require!(
                     false,
                     InvalidMonomorphization::UnsupportedCast {
                         span,
                         name,
                         in_ty,
                         in_elem,
                         ret_ty,
                         out_elem
                     }
                 );
             }
             _ => return Ok(bx.context.convert_vector(None, args[0].immediate(), llret_ty)),
         }
     }

     macro_rules! arith_binary {
         ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
             $(if name == sym::$name {
                 match in_elem.kind() {
                     $($(ty::$p(_))|* => {
                         return Ok(bx.$call(args[0].immediate(), args[1].immediate()))
                     })*
                     _ => {},
                 }
                 return_error!(InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem })
             })*
         }
     }

     if name == sym::simd_bitmask {
         // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
         // vector mask and returns the most significant bit (MSB) of each lane in the form
         // of either:
         // * an unsigned integer
         // * an array of `u8`
         // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
         //
         // The bit order of the result depends on the byte endianness, LSB-first for little
         // endian and MSB-first for big endian.

         let vector = args[0].immediate();
         // TODO(antoyo): dyncast_vector should not require a call to unqualified.
         let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type");
         let elem_type = vector_type.get_element_type();

         let expected_int_bits = in_len.max(8);
         let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);

         // FIXME(antoyo): that's not going to work for masks bigger than 128 bits.
         let result_type = bx.type_ix(expected_int_bits);
         let mut result = bx.context.new_rvalue_zero(result_type);

         let elem_size = elem_type.get_size() * 8;
         let sign_shift = bx.context.new_rvalue_from_int(elem_type, elem_size as i32 - 1);
         let one = bx.context.new_rvalue_one(elem_type);

         let mut shift = 0;
         for i in 0..in_len {
             let elem =
                 bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
             let shifted = elem >> sign_shift;
             let masked = shifted & one;
             result = result
                 | (bx.context.new_cast(None, masked, result_type)
                     << bx.context.new_rvalue_from_int(result_type, shift));
             shift += 1;
         }

         match ret_ty.kind() {
             ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {
                 // Zero-extend iN to the bitmask type:
                 return Ok(result);
             }
             ty::Array(elem, len)
                 if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
                     && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all())
                         == Some(expected_bytes) =>
             {
                 // Zero-extend iN to the array length:
                 let ze = bx.zext(result, bx.type_ix(expected_bytes * 8));

                 // Convert the integer to a byte array
                 let ptr = bx.alloca(bx.type_ix(expected_bytes * 8), Align::ONE);
                 bx.store(ze, ptr, Align::ONE);
                 let array_ty = bx.type_array(bx.type_i8(), expected_bytes);
                 let ptr = bx.pointercast(ptr, bx.cx.type_ptr_to(array_ty));
                 return Ok(bx.load(array_ty, ptr, Align::ONE));
             }
             _ => return_error!(InvalidMonomorphization::CannotReturn {
                 span,
                 name,
                 ret_ty,
                 expected_int_bits,
                 expected_bytes
             }),
         }
     }

     fn simd_simple_float_intrinsic<'gcc, 'tcx>(
         name: Symbol,
         in_elem: Ty<'_>,
         in_ty: Ty<'_>,
         in_len: u64,
         bx: &mut Builder<'_, 'gcc, 'tcx>,
         span: Span,
         args: &[OperandRef<'tcx, RValue<'gcc>>],
     ) -> Result<RValue<'gcc>, ()> {
         macro_rules! return_error {
             ($err:expr) => {{
                 bx.sess().emit_err($err);
                 return Err(());
             }};
         }
         let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
             let elem_ty = bx.cx.type_float_from_ty(*f);
             match f.bit_width() {
                 32 => ("f", elem_ty),
                 64 => ("", elem_ty),
                 _ => {
                     return_error!(InvalidMonomorphization::FloatingPointVector {
                         span,
                         name,
                         f_ty: *f,
                         in_ty
                     });
                 }
             }
         } else {
             return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
         };

         let vec_ty = bx.cx.type_vector(elem_ty, in_len);

         let intr_name = match name {
             sym::simd_ceil => "ceil",
             sym::simd_fabs => "fabs", // TODO(antoyo): pand with 170141183420855150465331762880109871103
             sym::simd_fcos => "cos",
             sym::simd_fexp2 => "exp2",
             sym::simd_fexp => "exp",
             sym::simd_flog10 => "log10",
             sym::simd_flog2 => "log2",
             sym::simd_flog => "log",
             sym::simd_floor => "floor",
             sym::simd_fma => "fma",
             sym::simd_fpowi => "__builtin_powi",
             sym::simd_fpow => "pow",
             sym::simd_fsin => "sin",
             sym::simd_fsqrt => "sqrt",
             sym::simd_round => "round",
             sym::simd_trunc => "trunc",
             _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
         };
         let builtin_name = format!("{}{}", intr_name, elem_ty_str);
         let funcs = bx.cx.functions.borrow();
         let function = funcs
             .get(&builtin_name)
             .unwrap_or_else(|| panic!("unable to find builtin function {}", builtin_name));

         // TODO(antoyo): add platform-specific behavior here for architectures that have these
         // intrinsics as instructions (for instance, gpus)
         let mut vector_elements = vec![];
         for i in 0..in_len {
             let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
             // we have to treat fpowi specially, since fpowi's second argument is always an i32
             let arguments = if name == sym::simd_fpowi {
                 vec![
                     bx.extract_element(args[0].immediate(), index).to_rvalue(),
                     args[1].immediate(),
                 ]
             } else {
                 args.iter()
                     .map(|arg| bx.extract_element(arg.immediate(), index).to_rvalue())
                     .collect()
             };
             vector_elements.push(bx.context.new_call(None, *function, &arguments));
         }
         let c = bx.context.new_rvalue_from_vector(None, vec_ty, &vector_elements);
         Ok(c)
     }

     if std::matches!(
         name,
         sym::simd_ceil
             | sym::simd_fabs
             | sym::simd_fcos
             | sym::simd_fexp2
             | sym::simd_fexp
             | sym::simd_flog10
             | sym::simd_flog2
             | sym::simd_flog
             | sym::simd_floor
             | sym::simd_fma
             | sym::simd_fpow
             | sym::simd_fpowi
             | sym::simd_fsin
             | sym::simd_fsqrt
             | sym::simd_round
             | sym::simd_trunc
     ) {
         return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
     }

     #[cfg(feature = "master")]
     fn vector_ty<'gcc, 'tcx>(
         cx: &CodegenCx<'gcc, 'tcx>,
         elem_ty: Ty<'tcx>,
         vec_len: u64,
     ) -> Type<'gcc> {
         // FIXME: use cx.layout_of(ty).llvm_type() ?
         let elem_ty = match *elem_ty.kind() {
             ty::Int(v) => cx.type_int_from_ty(v),
             ty::Uint(v) => cx.type_uint_from_ty(v),
             ty::Float(v) => cx.type_float_from_ty(v),
             _ => unreachable!(),
         };
         cx.type_vector(elem_ty, vec_len)
     }

     #[cfg(feature = "master")]
     fn gather<'a, 'gcc, 'tcx>(
         default: RValue<'gcc>,
         pointers: RValue<'gcc>,
         mask: RValue<'gcc>,
         pointer_count: usize,
         bx: &mut Builder<'a, 'gcc, 'tcx>,
         in_len: u64,
         underlying_ty: Ty<'tcx>,
         invert: bool,
     ) -> RValue<'gcc> {
         let vector_type = if pointer_count > 1 {
             bx.context.new_vector_type(bx.usize_type, in_len)
         } else {
             vector_ty(bx, underlying_ty, in_len)
         };
         let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();

         let mut values = vec![];
         for i in 0..in_len {
             let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
             let int = bx.context.new_vector_access(None, pointers, index).to_rvalue();

             let ptr_type = elem_type.make_pointer();
             let ptr = bx.context.new_bitcast(None, int, ptr_type);
             let value = ptr.dereference(None).to_rvalue();
             values.push(value);
         }

         let vector = bx.context.new_rvalue_from_vector(None, vector_type, &values);

         let mut mask_types = vec![];
         let mut mask_values = vec![];
         for i in 0..in_len {
             let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
             mask_types.push(bx.context.new_field(None, bx.i32_type, "m"));
             let mask_value = bx.context.new_vector_access(None, mask, index).to_rvalue();
             let masked = bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value;
             let value = index + masked;
             mask_values.push(value);
         }
         let mask_type = bx.context.new_struct_type(None, "mask_type", &mask_types);
         let mask = bx.context.new_struct_constructor(None, mask_type.as_type(), None, &mask_values);

         if invert {
             bx.shuffle_vector(vector, default, mask)
         } else {
             bx.shuffle_vector(default, vector, mask)
         }
     }

     #[cfg(feature = "master")]
     if name == sym::simd_gather {
         // simd_gather(values: <N x T>, pointers: <N x *_ T>,
         //             mask: <N x i{M}>) -> <N x T>
         // * N: number of elements in the input vectors
         // * T: type of the element to load
         // * M: any integer width is supported, will be truncated to i1

         // All types must be simd vector types
         require_simd!(in_ty, InvalidMonomorphization::SimdFirst { span, name, ty: in_ty });
         require_simd!(
             arg_tys[1],
             InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] }
         );
         require_simd!(
             arg_tys[2],
             InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] }
         );
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });

         // Of the same length:
         let (out_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
         let (out_len2, _) = arg_tys[2].simd_size_and_type(bx.tcx());
         require!(
             in_len == out_len,
             InvalidMonomorphization::SecondArgumentLength {
                 span,
                 name,
                 in_len,
                 in_ty,
                 arg_ty: arg_tys[1],
                 out_len
             }
         );
         require!(
             in_len == out_len2,
             InvalidMonomorphization::ThirdArgumentLength {
                 span,
                 name,
                 in_len,
                 in_ty,
                 arg_ty: arg_tys[2],
                 out_len: out_len2
             }
         );

         // The return type must match the first argument type
         require!(
             ret_ty == in_ty,
             InvalidMonomorphization::ExpectedReturnType { span, name, in_ty, ret_ty }
         );

         // This counts how many pointers
         fn ptr_count(t: Ty<'_>) -> usize {
             match t.kind() {
                 ty::RawPtr(p) => 1 + ptr_count(p.ty),
                 _ => 0,
             }
         }

         // Non-ptr type
         fn non_ptr(t: Ty<'_>) -> Ty<'_> {
             match t.kind() {
                 ty::RawPtr(p) => non_ptr(p.ty),
                 _ => t,
             }
         }

         // The second argument must be a simd vector with an element type that's a pointer
         // to the element type of the first argument
         let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx());
         let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
         let (pointer_count, underlying_ty) = match element_ty1.kind() {
             ty::RawPtr(p) if p.ty == in_elem => (ptr_count(element_ty1), non_ptr(element_ty1)),
             _ => {
                 require!(
                     false,
                     InvalidMonomorphization::ExpectedElementType {
                         span,
                         name,
                         expected_element: element_ty1,
                         second_arg: arg_tys[1],
                         in_elem,
                         in_ty,
                         mutability: ExpectedPointerMutability::Not,
                     }
                 );
                 unreachable!();
             }
         };
         assert!(pointer_count > 0);
         assert_eq!(pointer_count - 1, ptr_count(element_ty0));
         assert_eq!(underlying_ty, non_ptr(element_ty0));

         // The element type of the third argument must be a signed integer type of any width:
         let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx());
         match element_ty2.kind() {
             ty::Int(_) => (),
             _ => {
                 require!(
                     false,
                     InvalidMonomorphization::ThirdArgElementType {
                         span,
                         name,
                         expected_element: element_ty2,
                         third_arg: arg_tys[2]
                     }
                 );
             }
         }

         return Ok(gather(
             args[0].immediate(),
             args[1].immediate(),
             args[2].immediate(),
             pointer_count,
             bx,
             in_len,
             underlying_ty,
             false,
         ));
     }

     #[cfg(feature = "master")]
     if name == sym::simd_scatter {
         // simd_scatter(values: <N x T>, pointers: <N x *mut T>,
         //             mask: <N x i{M}>) -> ()
         // * N: number of elements in the input vectors
         // * T: type of the element to load
         // * M: any integer width is supported, will be truncated to i1

         // All types must be simd vector types
         require_simd!(in_ty, InvalidMonomorphization::SimdFirst { span, name, ty: in_ty });
         require_simd!(
             arg_tys[1],
             InvalidMonomorphization::SimdSecond { span, name, ty: arg_tys[1] }
         );
         require_simd!(
             arg_tys[2],
             InvalidMonomorphization::SimdThird { span, name, ty: arg_tys[2] }
         );

         // Of the same length:
         let (element_len1, _) = arg_tys[1].simd_size_and_type(bx.tcx());
         let (element_len2, _) = arg_tys[2].simd_size_and_type(bx.tcx());
         require!(
             in_len == element_len1,
             InvalidMonomorphization::SecondArgumentLength {
                 span,
                 name,
                 in_len,
                 in_ty,
                 arg_ty: arg_tys[1],
                 out_len: element_len1
             }
         );
         require!(
             in_len == element_len2,
             InvalidMonomorphization::ThirdArgumentLength {
                 span,
                 name,
                 in_len,
                 in_ty,
                 arg_ty: arg_tys[2],
                 out_len: element_len2
             }
         );

         // This counts how many pointers
         fn ptr_count(t: Ty<'_>) -> usize {
             match t.kind() {
                 ty::RawPtr(p) => 1 + ptr_count(p.ty),
                 _ => 0,
             }
         }

         // Non-ptr type
         fn non_ptr(t: Ty<'_>) -> Ty<'_> {
             match t.kind() {
                 ty::RawPtr(p) => non_ptr(p.ty),
                 _ => t,
             }
         }

         // The second argument must be a simd vector with an element type that's a pointer
         // to the element type of the first argument
         let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx());
         let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
         let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx());
         let (pointer_count, underlying_ty) = match element_ty1.kind() {
             ty::RawPtr(p) if p.ty == in_elem && p.mutbl == hir::Mutability::Mut => {
                 (ptr_count(element_ty1), non_ptr(element_ty1))
             }
             _ => {
                 require!(
                     false,
                     InvalidMonomorphization::ExpectedElementType {
                         span,
                         name,
                         expected_element: element_ty1,
                         second_arg: arg_tys[1],
                         in_elem,
                         in_ty,
                         mutability: ExpectedPointerMutability::Mut,
                     }
                 );
                 unreachable!();
             }
         };
         assert!(pointer_count > 0);
         assert_eq!(pointer_count - 1, ptr_count(element_ty0));
         assert_eq!(underlying_ty, non_ptr(element_ty0));

         // The element type of the third argument must be a signed integer type of any width:
         match element_ty2.kind() {
             ty::Int(_) => (),
             _ => {
                 require!(
                     false,
                     InvalidMonomorphization::ThirdArgElementType {
                         span,
                         name,
                         expected_element: element_ty2,
                         third_arg: arg_tys[2]
                     }
                 );
             }
         }

         let result = gather(
             args[0].immediate(),
             args[1].immediate(),
             args[2].immediate(),
             pointer_count,
             bx,
             in_len,
             underlying_ty,
             true,
         );

         let pointers = args[1].immediate();

         let vector_type = if pointer_count > 1 {
             bx.context.new_vector_type(bx.usize_type, in_len)
         } else {
             vector_ty(bx, underlying_ty, in_len)
         };
         let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();

         for i in 0..in_len {
             let index = bx.context.new_rvalue_from_int(bx.int_type, i as i32);
             let value = bx.context.new_vector_access(None, result, index);

             let int = bx.context.new_vector_access(None, pointers, index).to_rvalue();
             let ptr_type = elem_type.make_pointer();
             let ptr = bx.context.new_bitcast(None, int, ptr_type);
             bx.llbb().add_assignment(None, ptr.dereference(None), value);
         }

         return Ok(bx.context.new_rvalue_zero(bx.i32_type));
     }

     arith_binary! {
         simd_add: Uint, Int => add, Float => fadd;
         simd_sub: Uint, Int => sub, Float => fsub;
         simd_mul: Uint, Int => mul, Float => fmul;
         simd_div: Uint => udiv, Int => sdiv, Float => fdiv;
         simd_rem: Uint => urem, Int => srem, Float => frem;
         simd_shl: Uint, Int => shl;
         simd_shr: Uint => lshr, Int => ashr;
         simd_and: Uint, Int => and;
         simd_or: Uint, Int => or; // FIXME(antoyo): calling `or` might not work on vectors.
         simd_xor: Uint, Int => xor;
         simd_fmin: Float => vector_fmin;
         simd_fmax: Float => vector_fmax;
     }

     macro_rules! arith_unary {
         ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
             $(if name == sym::$name {
                 match in_elem.kind() {
                     $($(ty::$p(_))|* => {
                         return Ok(bx.$call(args[0].immediate()))
                     })*
                     _ => {},
                 }
                 return_error!(InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem })
             })*
         }
     }

     arith_unary! {
         simd_neg: Int => neg, Float => fneg;
     }

     #[cfg(feature = "master")]
     if name == sym::simd_saturating_add || name == sym::simd_saturating_sub {
         let lhs = args[0].immediate();
         let rhs = args[1].immediate();
         let is_add = name == sym::simd_saturating_add;
         let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
         let (signed, elem_width, elem_ty) = match *in_elem.kind() {
             ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_int_from_ty(i)),
             ty::Uint(i) => {
                 (false, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_uint_from_ty(i))
             }
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedVectorElementType {
                     span,
                     name,
                     expected_element: arg_tys[0].simd_size_and_type(bx.tcx()).1,
                     vector_type: arg_tys[0],
                 });
             }
         };

         let result = match (signed, is_add) {
             (false, true) => {
                 let res = lhs + rhs;
                 let cmp = bx.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
                 res | cmp
             }
             (true, true) => {
                 // Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
                 // TODO(antoyo): improve using conditional operators if possible.
                 // TODO(antoyo): dyncast_vector should not require a call to unqualified.
                 let arg_type = lhs.get_type().unqualified();
                 // TODO(antoyo): convert lhs and rhs to unsigned.
                 let sum = lhs + rhs;
                 let vector_type = arg_type.dyncast_vector().expect("vector type");
                 let unit = vector_type.get_num_units();
                 let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
                 let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);

                 let xor1 = lhs ^ rhs;
                 let xor2 = lhs ^ sum;
                 let and =
                     bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
                 let mask = and >> width;

                 let one = bx.context.new_rvalue_one(elem_ty);
                 let ones =
                     bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
                 let shift1 = ones << width;
                 let shift2 = sum >> width;
                 let mask_min = shift1 ^ shift2;

                 let and1 =
                     bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
                 let and2 = mask & mask_min;

                 and1 + and2
             }
             (false, false) => {
                 let res = lhs - rhs;
                 let cmp = bx.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
                 res & cmp
             }
             (true, false) => {
                 // TODO(antoyo): dyncast_vector should not require a call to unqualified.
                 let arg_type = lhs.get_type().unqualified();
                 // TODO(antoyo): this uses the same algorithm from saturating add, but add the
                 // negative of the right operand. Find a proper subtraction algorithm.
                 let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);

                 // TODO(antoyo): convert lhs and rhs to unsigned.
                 let sum = lhs + rhs;
                 let vector_type = arg_type.dyncast_vector().expect("vector type");
                 let unit = vector_type.get_num_units();
                 let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
                 let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);

                 let xor1 = lhs ^ rhs;
                 let xor2 = lhs ^ sum;
                 let and =
                     bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
                 let mask = and >> width;

                 let one = bx.context.new_rvalue_one(elem_ty);
                 let ones =
                     bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
                 let shift1 = ones << width;
                 let shift2 = sum >> width;
                 let mask_min = shift1 ^ shift2;

                 let and1 =
                     bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
                 let and2 = mask & mask_min;

                 and1 + and2
             }
         };

         return Ok(result);
     }

     macro_rules! arith_red {
         ($name:ident : $vec_op:expr, $float_reduce:ident, $ordered:expr, $op:ident,
          $identity:expr) => {
             if name == sym::$name {
                 require!(
                     ret_ty == in_elem,
                     InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                 );
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.vector_reduce_op(args[0].immediate(), $vec_op);
                         if $ordered {
                             // if overflow occurs, the result is the
                             // mathematical result modulo 2^n:
                             Ok(bx.$op(args[1].immediate(), r))
                         } else {
                             Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
                         }
                     }
                     ty::Float(_) => {
                         if $ordered {
                             // ordered arithmetic reductions take an accumulator
                             let acc = args[1].immediate();
                             Ok(bx.$float_reduce(acc, args[0].immediate()))
                         } else {
                             Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
                         }
                     }
                     _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                         span,
                         name,
                         symbol: sym::$name,
                         in_ty,
                         in_elem,
                         ret_ty
                     }),
                 };
             }
         };
     }

     arith_red!(
         simd_reduce_add_unordered: BinaryOp::Plus,
         vector_reduce_fadd_fast,
         false,
         add,
         0.0 // TODO: Use this argument.
     );
     arith_red!(
         simd_reduce_mul_unordered: BinaryOp::Mult,
         vector_reduce_fmul_fast,
         false,
         mul,
         1.0
     );
     arith_red!(
         simd_reduce_add_ordered: BinaryOp::Plus,
         vector_reduce_fadd,
         true,
         add,
         0.0
     );
     arith_red!(
         simd_reduce_mul_ordered: BinaryOp::Mult,
         vector_reduce_fmul,
         true,
         mul,
         1.0
     );

     macro_rules! minmax_red {
         ($name:ident: $int_red:ident, $float_red:ident) => {
             if name == sym::$name {
                 require!(
                     ret_ty == in_elem,
                     InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                 );
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())),
                     ty::Float(_) => Ok(bx.$float_red(args[0].immediate())),
                     _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                         span,
                         name,
                         symbol: sym::$name,
                         in_ty,
                         in_elem,
                         ret_ty
                     }),
                 };
             }
         };
     }

     minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
     minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
     // TODO(sadlerap): revisit these intrinsics to generate more optimal reductions
     minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin);
     minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax);

     macro_rules! bitwise_red {
         ($name:ident : $op:expr, $boolean:expr) => {
             if name == sym::$name {
                 let input = if !$boolean {
                     require!(
                         ret_ty == in_elem,
                         InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
                     );
                     args[0].immediate()
                 } else {
                     match in_elem.kind() {
                         ty::Int(_) | ty::Uint(_) => {}
                         _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                             span,
                             name,
                             symbol: sym::$name,
                             in_ty,
                             in_elem,
                             ret_ty
                         }),
                     }

                     args[0].immediate()
                 };
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.vector_reduce_op(input, $op);
                         Ok(if !$boolean {
                             r
                         } else {
                             bx.icmp(
                                 IntPredicate::IntNE,
                                 r,
                                 bx.context.new_rvalue_zero(r.get_type()),
                             )
                         })
                     }
                     _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                         span,
                         name,
                         symbol: sym::$name,
                         in_ty,
                         in_elem,
                         ret_ty
                     }),
                 };
             }
         };
     }

     bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false);
     bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false);
     bitwise_red!(simd_reduce_xor: BinaryOp::BitwiseXor, false);
     bitwise_red!(simd_reduce_all: BinaryOp::BitwiseAnd, true);
     bitwise_red!(simd_reduce_any: BinaryOp::BitwiseOr, true);

     unimplemented!("simd {}", name);
 }