vendor/encoding_rs/src/utf_8.rs - toolchain/rustc - Git at Google

 // Copyright Mozilla Foundation. See the COPYRIGHT
 // file at the top-level directory of this distribution.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 use super::*;
 use crate::ascii::ascii_to_basic_latin;
 use crate::ascii::basic_latin_to_ascii;
 use crate::ascii::validate_ascii;
 use crate::handles::*;
 use crate::mem::convert_utf16_to_utf8_partial;
 use crate::variant::*;

 cfg_if! {
     if #[cfg(feature = "simd-accel")] {
         use ::core::intrinsics::unlikely;
         use ::core::intrinsics::likely;
     } else {
         #[inline(always)]
         fn unlikely(b: bool) -> bool {
             b
         }
         #[inline(always)]
         fn likely(b: bool) -> bool {
             b
         }
     }
 }

 #[repr(align(64))] // Align to cache lines
 pub struct Utf8Data {
     pub table: [u8; 384],
 }

 // BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
 // Instead, please regenerate using generate-encoding-data.py

 pub static UTF8_DATA: Utf8Data = Utf8Data {
     table: [
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 148, 148, 148,
         148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 164, 164, 164, 164, 164,
         164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
         164, 164, 164, 164, 164, 164, 164, 164, 164, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
         252, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
         4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
         8, 8, 8, 8, 8, 8, 8, 16, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 32, 8, 8, 64, 8, 8, 8, 128, 4,
         4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
     ],
 };

 // END GENERATED CODE

 pub fn utf8_valid_up_to(src: &[u8]) -> usize {
     let mut read = 0;
     'outer: loop {
         let mut byte = {
             let src_remaining = &src[read..];
             match validate_ascii(src_remaining) {
                 None => {
                     return src.len();
                 }
                 Some((non_ascii, consumed)) => {
                     read += consumed;
                     non_ascii
                 }
             }
         };
         // Check for the longest sequence to avoid checking twice for the
         // multi-byte sequences. This can't overflow with 64-bit address space,
         // because full 64 bits aren't in use. In the 32-bit PAE case, for this
         // to overflow would mean that the source slice would be so large that
         // the address space of the process would not have space for any code.
         // Therefore, the slice cannot be so long that this would overflow.
         if likely(read + 4 <= src.len()) {
             'inner: loop {
                 // At this point, `byte` is not included in `read`, because we
                 // don't yet know that a) the UTF-8 sequence is valid and b) that there
                 // is output space if it is an astral sequence.
                 // Inspecting the lead byte directly is faster than what the
                 // std lib does!
                 if likely(in_inclusive_range8(byte, 0xC2, 0xDF)) {
                     // Two-byte
                     let second = unsafe { *(src.get_unchecked(read + 1)) };
                     if !in_inclusive_range8(second, 0x80, 0xBF) {
                         break 'outer;
                     }
                     read += 2;

                     // Next lead (manually inlined)
                     if likely(read + 4 <= src.len()) {
                         byte = unsafe { *(src.get_unchecked(read)) };
                         if byte < 0x80 {
                             read += 1;
                             continue 'outer;
                         }
                         continue 'inner;
                     }
                     break 'inner;
                 }
                 if likely(byte < 0xF0) {
                     'three: loop {
                         // Three-byte
                         let second = unsafe { *(src.get_unchecked(read + 1)) };
                         let third = unsafe { *(src.get_unchecked(read + 2)) };
                         if ((UTF8_DATA.table[usize::from(second)]
                             & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
                             | (third >> 6))
                             != 2
                         {
                             break 'outer;
                         }
                         read += 3;

                         // Next lead (manually inlined)
                         if likely(read + 4 <= src.len()) {
                             byte = unsafe { *(src.get_unchecked(read)) };
                             if in_inclusive_range8(byte, 0xE0, 0xEF) {
                                 continue 'three;
                             }
                             if likely(byte < 0x80) {
                                 read += 1;
                                 continue 'outer;
                             }
                             continue 'inner;
                         }
                         break 'inner;
                     }
                 }
                 // Four-byte
                 let second = unsafe { *(src.get_unchecked(read + 1)) };
                 let third = unsafe { *(src.get_unchecked(read + 2)) };
                 let fourth = unsafe { *(src.get_unchecked(read + 3)) };
                 if (u16::from(
                     UTF8_DATA.table[usize::from(second)]
                         & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) },
                 ) | u16::from(third >> 6)
                     | (u16::from(fourth & 0xC0) << 2))
                     != 0x202
                 {
                     break 'outer;
                 }
                 read += 4;

                 // Next lead
                 if likely(read + 4 <= src.len()) {
                     byte = unsafe { *(src.get_unchecked(read)) };
                     if byte < 0x80 {
                         read += 1;
                         continue 'outer;
                     }
                     continue 'inner;
                 }
                 break 'inner;
             }
         }
         // We can't have a complete 4-byte sequence, but we could still have
         // one to three shorter sequences.
         'tail: loop {
             // >= is better for bound check elision than ==
             if read >= src.len() {
                 break 'outer;
             }
             byte = src[read];
             // At this point, `byte` is not included in `read`, because we
             // don't yet know that a) the UTF-8 sequence is valid and b) that there
             // is output space if it is an astral sequence.
             // Inspecting the lead byte directly is faster than what the
             // std lib does!
             if byte < 0x80 {
                 read += 1;
                 continue 'tail;
             }
             if in_inclusive_range8(byte, 0xC2, 0xDF) {
                 // Two-byte
                 let new_read = read + 2;
                 if new_read > src.len() {
                     break 'outer;
                 }
                 let second = src[read + 1];
                 if !in_inclusive_range8(second, 0x80, 0xBF) {
                     break 'outer;
                 }
                 read += 2;
                 continue 'tail;
             }
             // We need to exclude valid four byte lead bytes, because
             // `UTF8_DATA.second_mask` covers
             if byte < 0xF0 {
                 // Three-byte
                 let new_read = read + 3;
                 if new_read > src.len() {
                     break 'outer;
                 }
                 let second = src[read + 1];
                 let third = src[read + 2];
                 if ((UTF8_DATA.table[usize::from(second)]
                     & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
                     | (third >> 6))
                     != 2
                 {
                     break 'outer;
                 }
                 read += 3;
                 // `'tail` handles sequences shorter than 4, so
                 // there can't be another sequence after this one.
                 break 'outer;
             }
             break 'outer;
         }
     }
     read
 }

 #[cfg_attr(feature = "cargo-clippy", allow(never_loop, cyclomatic_complexity))]
 pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usize, usize) {
     let mut read = 0;
     let mut written = 0;
     'outer: loop {
         let mut byte = {
             let src_remaining = &src[read..];
             let dst_remaining = &mut dst[written..];
             let length = ::core::cmp::min(src_remaining.len(), dst_remaining.len());
             match unsafe {
                 ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
             } {
                 None => {
                     read += length;
                     written += length;
                     break 'outer;
                 }
                 Some((non_ascii, consumed)) => {
                     read += consumed;
                     written += consumed;
                     non_ascii
                 }
             }
         };
         // Check for the longest sequence to avoid checking twice for the
         // multi-byte sequences. This can't overflow with 64-bit address space,
         // because full 64 bits aren't in use. In the 32-bit PAE case, for this
         // to overflow would mean that the source slice would be so large that
         // the address space of the process would not have space for any code.
         // Therefore, the slice cannot be so long that this would overflow.
         if likely(read + 4 <= src.len()) {
             'inner: loop {
                 // At this point, `byte` is not included in `read`, because we
                 // don't yet know that a) the UTF-8 sequence is valid and b) that there
                 // is output space if it is an astral sequence.
                 // We know, thanks to `ascii_to_basic_latin` that there is output
                 // space for at least one UTF-16 code unit, so no need to check
                 // for output space in the BMP cases.
                 // Inspecting the lead byte directly is faster than what the
                 // std lib does!
                 if likely(in_inclusive_range8(byte, 0xC2, 0xDF)) {
                     // Two-byte
                     let second = unsafe { *(src.get_unchecked(read + 1)) };
                     if !in_inclusive_range8(second, 0x80, 0xBF) {
                         break 'outer;
                     }
                     unsafe {
                         *(dst.get_unchecked_mut(written)) =
                             ((u16::from(byte) & 0x1F) << 6) | (u16::from(second) & 0x3F)
                     };
                     read += 2;
                     written += 1;

                     // Next lead (manually inlined)
                     if written == dst.len() {
                         break 'outer;
                     }
                     if likely(read + 4 <= src.len()) {
                         byte = unsafe { *(src.get_unchecked(read)) };
                         if byte < 0x80 {
                             unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
                             read += 1;
                             written += 1;
                             continue 'outer;
                         }
                         continue 'inner;
                     }
                     break 'inner;
                 }
                 if likely(byte < 0xF0) {
                     'three: loop {
                         // Three-byte
                         let second = unsafe { *(src.get_unchecked(read + 1)) };
                         let third = unsafe { *(src.get_unchecked(read + 2)) };
                         if ((UTF8_DATA.table[usize::from(second)]
                             & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
                             | (third >> 6))
                             != 2
                         {
                             break 'outer;
                         }
                         let point = ((u16::from(byte) & 0xF) << 12)
                             | ((u16::from(second) & 0x3F) << 6)
                             | (u16::from(third) & 0x3F);
                         unsafe { *(dst.get_unchecked_mut(written)) = point };
                         read += 3;
                         written += 1;

                         // Next lead (manually inlined)
                         if written == dst.len() {
                             break 'outer;
                         }
                         if likely(read + 4 <= src.len()) {
                             byte = unsafe { *(src.get_unchecked(read)) };
                             if in_inclusive_range8(byte, 0xE0, 0xEF) {
                                 continue 'three;
                             }
                             if likely(byte < 0x80) {
                                 unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
                                 read += 1;
                                 written += 1;
                                 continue 'outer;
                             }
                             continue 'inner;
                         }
                         break 'inner;
                     }
                 }
                 // Four-byte
                 if written + 1 == dst.len() {
                     break 'outer;
                 }
                 let second = unsafe { *(src.get_unchecked(read + 1)) };
                 let third = unsafe { *(src.get_unchecked(read + 2)) };
                 let fourth = unsafe { *(src.get_unchecked(read + 3)) };
                 if (u16::from(
                     UTF8_DATA.table[usize::from(second)]
                         & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) },
                 ) | u16::from(third >> 6)
                     | (u16::from(fourth & 0xC0) << 2))
                     != 0x202
                 {
                     break 'outer;
                 }
                 let point = ((u32::from(byte) & 0x7) << 18)
                     | ((u32::from(second) & 0x3F) << 12)
                     | ((u32::from(third) & 0x3F) << 6)
                     | (u32::from(fourth) & 0x3F);
                 unsafe { *(dst.get_unchecked_mut(written)) = (0xD7C0 + (point >> 10)) as u16 };
                 unsafe {
                     *(dst.get_unchecked_mut(written + 1)) = (0xDC00 + (point & 0x3FF)) as u16
                 };
                 read += 4;
                 written += 2;

                 // Next lead
                 if written == dst.len() {
                     break 'outer;
                 }
                 if likely(read + 4 <= src.len()) {
                     byte = unsafe { *(src.get_unchecked(read)) };
                     if byte < 0x80 {
                         unsafe { *(dst.get_unchecked_mut(written)) = u16::from(byte) };
                         read += 1;
                         written += 1;
                         continue 'outer;
                     }
                     continue 'inner;
                 }
                 break 'inner;
             }
         }
         // We can't have a complete 4-byte sequence, but we could still have
         // one to three shorter sequences.
         'tail: loop {
             // >= is better for bound check elision than ==
             if read >= src.len() || written >= dst.len() {
                 break 'outer;
             }
             byte = src[read];
             // At this point, `byte` is not included in `read`, because we
             // don't yet know that a) the UTF-8 sequence is valid and b) that there
             // is output space if it is an astral sequence.
             // Inspecting the lead byte directly is faster than what the
             // std lib does!
             if byte < 0x80 {
                 dst[written] = u16::from(byte);
                 read += 1;
                 written += 1;
                 continue 'tail;
             }
             if in_inclusive_range8(byte, 0xC2, 0xDF) {
                 // Two-byte
                 let new_read = read + 2;
                 if new_read > src.len() {
                     break 'outer;
                 }
                 let second = src[read + 1];
                 if !in_inclusive_range8(second, 0x80, 0xBF) {
                     break 'outer;
                 }
                 dst[written] = ((u16::from(byte) & 0x1F) << 6) | (u16::from(second) & 0x3F);
                 read += 2;
                 written += 1;
                 continue 'tail;
             }
             // We need to exclude valid four byte lead bytes, because
             // `UTF8_DATA.second_mask` covers
             if byte < 0xF0 {
                 // Three-byte
                 let new_read = read + 3;
                 if new_read > src.len() {
                     break 'outer;
                 }
                 let second = src[read + 1];
                 let third = src[read + 2];
                 if ((UTF8_DATA.table[usize::from(second)]
                     & unsafe { *(UTF8_DATA.table.get_unchecked(byte as usize + 0x80)) })
                     | (third >> 6))
                     != 2
                 {
                     break 'outer;
                 }
                 let point = ((u16::from(byte) & 0xF) << 12)
                     | ((u16::from(second) & 0x3F) << 6)
                     | (u16::from(third) & 0x3F);
                 dst[written] = point;
                 read += 3;
                 written += 1;
                 // `'tail` handles sequences shorter than 4, so
                 // there can't be another sequence after this one.
                 break 'outer;
             }
             break 'outer;
         }
     }
     (read, written)
 }

 pub struct Utf8Decoder {
     code_point: u32,
     bytes_seen: usize,   // 1, 2 or 3: counts continuations only
     bytes_needed: usize, // 1, 2 or 3: counts continuations only
     lower_boundary: u8,
     upper_boundary: u8,
 }

 impl Utf8Decoder {
     pub fn new_inner() -> Utf8Decoder {
         Utf8Decoder {
             code_point: 0,
             bytes_seen: 0,
             bytes_needed: 0,
             lower_boundary: 0x80u8,
             upper_boundary: 0xBFu8,
         }
     }

     pub fn new() -> VariantDecoder {
         VariantDecoder::Utf8(Utf8Decoder::new_inner())
     }

     pub fn in_neutral_state(&self) -> bool {
         self.bytes_needed == 0
     }

     fn extra_from_state(&self) -> usize {
         if self.bytes_needed == 0 {
             0
         } else {
             self.bytes_seen + 1
         }
     }

     pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
         byte_length.checked_add(1 + self.extra_from_state())
     }

     pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
         byte_length.checked_add(3 + self.extra_from_state())
     }

     pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
         checked_add(
             3,
             checked_mul(3, byte_length.checked_add(self.extra_from_state())),
         )
     }

     decoder_functions!(
         {},
         {
             // This is the fast path. The rest runs only at the
             // start and end for partial sequences.
             if self.bytes_needed == 0 {
                 dest.copy_utf8_up_to_invalid_from(&mut source);
             }
         },
         {
             if self.bytes_needed != 0 {
                 let bad_bytes = (self.bytes_seen + 1) as u8;
                 self.code_point = 0;
                 self.bytes_needed = 0;
                 self.bytes_seen = 0;
                 return (
                     DecoderResult::Malformed(bad_bytes, 0),
                     src_consumed,
                     dest.written(),
                 );
             }
         },
         {
             if self.bytes_needed == 0 {
                 if b < 0x80u8 {
                     destination_handle.write_ascii(b);
                     continue;
                 }
                 if b < 0xC2u8 {
                     return (
                         DecoderResult::Malformed(1, 0),
                         unread_handle.consumed(),
                         destination_handle.written(),
                     );
                 }
                 if b < 0xE0u8 {
                     self.bytes_needed = 1;
                     self.code_point = u32::from(b) & 0x1F;
                     continue;
                 }
                 if b < 0xF0u8 {
                     if b == 0xE0u8 {
                         self.lower_boundary = 0xA0u8;
                     } else if b == 0xEDu8 {
                         self.upper_boundary = 0x9Fu8;
                     }
                     self.bytes_needed = 2;
                     self.code_point = u32::from(b) & 0xF;
                     continue;
                 }
                 if b < 0xF5u8 {
                     if b == 0xF0u8 {
                         self.lower_boundary = 0x90u8;
                     } else if b == 0xF4u8 {
                         self.upper_boundary = 0x8Fu8;
                     }
                     self.bytes_needed = 3;
                     self.code_point = u32::from(b) & 0x7;
                     continue;
                 }
                 return (
                     DecoderResult::Malformed(1, 0),
                     unread_handle.consumed(),
                     destination_handle.written(),
                 );
             }
             // self.bytes_needed != 0
             if !(b >= self.lower_boundary && b <= self.upper_boundary) {
                 let bad_bytes = (self.bytes_seen + 1) as u8;
                 self.code_point = 0;
                 self.bytes_needed = 0;
                 self.bytes_seen = 0;
                 self.lower_boundary = 0x80u8;
                 self.upper_boundary = 0xBFu8;
                 return (
                     DecoderResult::Malformed(bad_bytes, 0),
                     unread_handle.unread(),
                     destination_handle.written(),
                 );
             }
             self.lower_boundary = 0x80u8;
             self.upper_boundary = 0xBFu8;
             self.code_point = (self.code_point << 6) | (u32::from(b) & 0x3F);
             self.bytes_seen += 1;
             if self.bytes_seen != self.bytes_needed {
                 continue;
             }
             if self.bytes_needed == 3 {
                 destination_handle.write_astral(self.code_point);
             } else {
                 destination_handle.write_bmp_excl_ascii(self.code_point as u16);
             }
             self.code_point = 0;
             self.bytes_needed = 0;
             self.bytes_seen = 0;
             continue;
         },
         self,
         src_consumed,
         dest,
         source,
         b,
         destination_handle,
         unread_handle,
         check_space_astral
     );
 }

 #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]
 #[inline(never)]
 pub fn convert_utf16_to_utf8_partial_inner(src: &[u16], dst: &mut [u8]) -> (usize, usize) {
     let mut read = 0;
     let mut written = 0;
     'outer: loop {
         let mut unit = {
             let src_remaining = &src[read..];
             let dst_remaining = &mut dst[written..];
             let length = if dst_remaining.len() < src_remaining.len() {
                 dst_remaining.len()
             } else {
                 src_remaining.len()
             };
             match unsafe {
                 basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
             } {
                 None => {
                     read += length;
                     written += length;
                     return (read, written);
                 }
                 Some((non_ascii, consumed)) => {
                     read += consumed;
                     written += consumed;
                     non_ascii
                 }
             }
         };
         'inner: loop {
             // The following loop is only broken out of as a goto forward.
             loop {
                 // Unfortunately, this check isn't enough for the compiler to elide
                 // the bound checks on writes to dst, which is why they are manually
                 // elided, which makes a measurable difference.
                 if written.checked_add(4).unwrap() > dst.len() {
                     return (read, written);
                 }
                 read += 1;
                 if unit < 0x800 {
                     unsafe {
                         *(dst.get_unchecked_mut(written)) = (unit >> 6) as u8 | 0xC0u8;
                         written += 1;
                         *(dst.get_unchecked_mut(written)) = (unit & 0x3F) as u8 | 0x80u8;
                         written += 1;
                     }
                     break;
                 }
                 let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
                 if likely(unit_minus_surrogate_start > (0xDFFF - 0xD800)) {
                     unsafe {
                         *(dst.get_unchecked_mut(written)) = (unit >> 12) as u8 | 0xE0u8;
                         written += 1;
                         *(dst.get_unchecked_mut(written)) = ((unit & 0xFC0) >> 6) as u8 | 0x80u8;
                         written += 1;
                         *(dst.get_unchecked_mut(written)) = (unit & 0x3F) as u8 | 0x80u8;
                         written += 1;
                     }
                     break;
                 }
                 if likely(unit_minus_surrogate_start <= (0xDBFF - 0xD800)) {
                     // high surrogate
                     // read > src.len() is impossible, but using
                     // >= instead of == allows the compiler to elide a bound check.
                     if read >= src.len() {
                         debug_assert_eq!(read, src.len());
                         // Unpaired surrogate at the end of the buffer.
                         unsafe {
                             *(dst.get_unchecked_mut(written)) = 0xEFu8;
                             written += 1;
                             *(dst.get_unchecked_mut(written)) = 0xBFu8;
                             written += 1;
                             *(dst.get_unchecked_mut(written)) = 0xBDu8;
                             written += 1;
                         }
                         return (read, written);
                     }
                     let second = src[read];
                     let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00);
                     if likely(second_minus_low_surrogate_start <= (0xDFFF - 0xDC00)) {
                         // The next code unit is a low surrogate. Advance position.
                         read += 1;
                         let astral = (u32::from(unit) << 10) + u32::from(second)
                             - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
                         unsafe {
                             *(dst.get_unchecked_mut(written)) = (astral >> 18) as u8 | 0xF0u8;
                             written += 1;
                             *(dst.get_unchecked_mut(written)) =
                                 ((astral & 0x3F000u32) >> 12) as u8 | 0x80u8;
                             written += 1;
                             *(dst.get_unchecked_mut(written)) =
                                 ((astral & 0xFC0u32) >> 6) as u8 | 0x80u8;
                             written += 1;
                             *(dst.get_unchecked_mut(written)) = (astral & 0x3F) as u8 | 0x80u8;
                             written += 1;
                         }
                         break;
                     }
                     // The next code unit is not a low surrogate. Don't advance
                     // position and treat the high surrogate as unpaired.
                     // Fall through
                 }
                 // Unpaired low surrogate
                 unsafe {
                     *(dst.get_unchecked_mut(written)) = 0xEFu8;
                     written += 1;
                     *(dst.get_unchecked_mut(written)) = 0xBFu8;
                     written += 1;
                     *(dst.get_unchecked_mut(written)) = 0xBDu8;
                     written += 1;
                 }
                 break;
             }
             // Now see if the next unit is Basic Latin
             // read > src.len() is impossible, but using
             // >= instead of == allows the compiler to elide a bound check.
             if read >= src.len() {
                 debug_assert_eq!(read, src.len());
                 return (read, written);
             }
             unit = src[read];
             if unlikely(unit < 0x80) {
                 // written > dst.len() is impossible, but using
                 // >= instead of == allows the compiler to elide a bound check.
                 if written >= dst.len() {
                     debug_assert_eq!(written, dst.len());
                     return (read, written);
                 }
                 dst[written] = unit as u8;
                 read += 1;
                 written += 1;
                 // Mysteriously, adding a punctuation check here makes
                 // the expected benificiary cases *slower*!
                 continue 'outer;
             }
             continue 'inner;
         }
     }
 }

 #[inline(never)]
 pub fn convert_utf16_to_utf8_partial_tail(src: &[u16], dst: &mut [u8]) -> (usize, usize) {
     // Everything below is cold code!
     let mut read = 0;
     let mut written = 0;
     let mut unit = src[read];
     // We now have up to 3 output slots, so an astral character
     // will not fit.
     if unit < 0x800 {
         loop {
             if unit < 0x80 {
                 if written >= dst.len() {
                     return (read, written);
                 }
                 read += 1;
                 dst[written] = unit as u8;
                 written += 1;
             } else if unit < 0x800 {
                 if written + 2 > dst.len() {
                     return (read, written);
                 }
                 read += 1;
                 dst[written] = (unit >> 6) as u8 | 0xC0u8;
                 written += 1;
                 dst[written] = (unit & 0x3F) as u8 | 0x80u8;
                 written += 1;
             } else {
                 return (read, written);
             }
             // read > src.len() is impossible, but using
             // >= instead of == allows the compiler to elide a bound check.
             if read >= src.len() {
                 debug_assert_eq!(read, src.len());
                 return (read, written);
             }
             unit = src[read];
         }
     }
     // Could be an unpaired surrogate, but we'll need 3 output
     // slots in any case.
     if written + 3 > dst.len() {
         return (read, written);
     }
     read += 1;
     let unit_minus_surrogate_start = unit.wrapping_sub(0xD800);
     if unit_minus_surrogate_start <= (0xDFFF - 0xD800) {
         // Got surrogate
         if unit_minus_surrogate_start <= (0xDBFF - 0xD800) {
             // Got high surrogate
             if read >= src.len() {
                 // Unpaired high surrogate
                 unit = 0xFFFD;
             } else {
                 let second = src[read];
                 if in_inclusive_range16(second, 0xDC00, 0xDFFF) {
                     // Valid surrogate pair, but we know it won't fit.
                     read -= 1;
                     return (read, written);
                 }
                 // Unpaired high
                 unit = 0xFFFD;
             }
         } else {
             // Unpaired low
             unit = 0xFFFD;
         }
     }
     dst[written] = (unit >> 12) as u8 | 0xE0u8;
     written += 1;
     dst[written] = ((unit & 0xFC0) >> 6) as u8 | 0x80u8;
     written += 1;
     dst[written] = (unit & 0x3F) as u8 | 0x80u8;
     written += 1;
     debug_assert_eq!(written, dst.len());
     (read, written)
 }

 pub struct Utf8Encoder;

 impl Utf8Encoder {
     pub fn new(encoding: &'static Encoding) -> Encoder {
         Encoder::new(encoding, VariantEncoder::Utf8(Utf8Encoder))
     }

     pub fn max_buffer_length_from_utf16_without_replacement(
         &self,
         u16_length: usize,
     ) -> Option<usize> {
         u16_length.checked_mul(3)
     }

     pub fn max_buffer_length_from_utf8_without_replacement(
         &self,
         byte_length: usize,
     ) -> Option<usize> {
         Some(byte_length)
     }

     pub fn encode_from_utf16_raw(
         &mut self,
         src: &[u16],
         dst: &mut [u8],
         _last: bool,
     ) -> (EncoderResult, usize, usize) {
         let (read, written) = convert_utf16_to_utf8_partial(src, dst);
         (
             if read == src.len() {
                 EncoderResult::InputEmpty
             } else {
                 EncoderResult::OutputFull
             },
             read,
             written,
         )
     }

     pub fn encode_from_utf8_raw(
         &mut self,
         src: &str,
         dst: &mut [u8],
         _last: bool,
     ) -> (EncoderResult, usize, usize) {
         let bytes = src.as_bytes();
         let mut to_write = bytes.len();
         if to_write <= dst.len() {
             (&mut dst[..to_write]).copy_from_slice(bytes);
             return (EncoderResult::InputEmpty, to_write, to_write);
         }
         to_write = dst.len();
         // Move back until we find a UTF-8 sequence boundary.
         while (bytes[to_write] & 0xC0) == 0x80 {
             to_write -= 1;
         }
         (&mut dst[..to_write]).copy_from_slice(&bytes[..to_write]);
         (EncoderResult::OutputFull, to_write, to_write)
     }
 }

 // Any copyright to the test code below this comment is dedicated to the
 // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/

 #[cfg(all(test, feature = "alloc"))]
 mod tests {
     use super::super::testing::*;
     use super::super::*;

     //    fn decode_utf8_to_utf16(bytes: &[u8], expect: &[u16]) {
     //        decode_to_utf16_without_replacement(UTF_8, bytes, expect);
     //    }

     fn decode_utf8_to_utf8(bytes: &[u8], expect: &str) {
         decode_to_utf8(UTF_8, bytes, expect);
     }

     fn decode_valid_utf8(string: &str) {
         decode_utf8_to_utf8(string.as_bytes(), string);
     }

     fn encode_utf8_from_utf16(string: &[u16], expect: &[u8]) {
         encode_from_utf16(UTF_8, string, expect);
     }

     fn encode_utf8_from_utf8(string: &str, expect: &[u8]) {
         encode_from_utf8(UTF_8, string, expect);
     }

     fn encode_utf8_from_utf16_with_output_limit(
         string: &[u16],
         expect: &str,
         limit: usize,
         expect_result: EncoderResult,
     ) {
         let mut dst = Vec::new();
         {
             dst.resize(limit, 0u8);
             let mut encoder = UTF_8.new_encoder();
             let (result, read, written) =
                 encoder.encode_from_utf16_without_replacement(string, &mut dst, false);
             assert_eq!(result, expect_result);
             if expect_result == EncoderResult::InputEmpty {
                 assert_eq!(read, string.len());
             }
             assert_eq!(&dst[..written], expect.as_bytes());
         }
         {
             dst.resize(64, 0u8);
             for (i, elem) in dst.iter_mut().enumerate() {
                 *elem = i as u8;
             }
             let mut encoder = UTF_8.new_encoder();
             let (_, _, mut j) =
                 encoder.encode_from_utf16_without_replacement(string, &mut dst, false);
             while j < dst.len() {
                 assert_eq!(usize::from(dst[j]), j);
                 j += 1;
             }
         }
     }

     #[test]
     fn test_utf8_decode() {
         // Empty
         decode_valid_utf8("");
         // ASCII
         decode_valid_utf8("ab");
         // Low BMP
         decode_valid_utf8("a\u{E4}Z");
         // High BMP
         decode_valid_utf8("a\u{2603}Z");
         // Astral
         decode_valid_utf8("a\u{1F4A9}Z");
         // Low BMP with last byte missing
         decode_utf8_to_utf8(b"a\xC3Z", "a\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xC3", "a\u{FFFD}");
         // High BMP with last byte missing
         decode_utf8_to_utf8(b"a\xE2\x98Z", "a\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xE2\x98", "a\u{FFFD}");
         // Astral with last byte missing
         decode_utf8_to_utf8(b"a\xF0\x9F\x92Z", "a\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xF0\x9F\x92", "a\u{FFFD}");
         // Lone highest continuation
         decode_utf8_to_utf8(b"a\xBFZ", "a\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xBF", "a\u{FFFD}");
         // Two lone highest continuations
         decode_utf8_to_utf8(b"a\xBF\xBFZ", "a\u{FFFD}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xBF\xBF", "a\u{FFFD}\u{FFFD}");
         // Low BMP followed by lowest lone continuation
         decode_utf8_to_utf8(b"a\xC3\xA4\x80Z", "a\u{E4}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xC3\xA4\x80", "a\u{E4}\u{FFFD}");
         // Low BMP followed by highest lone continuation
         decode_utf8_to_utf8(b"a\xC3\xA4\xBFZ", "a\u{E4}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xC3\xA4\xBF", "a\u{E4}\u{FFFD}");
         // High BMP followed by lowest lone continuation
         decode_utf8_to_utf8(b"a\xE2\x98\x83\x80Z", "a\u{2603}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xE2\x98\x83\x80", "a\u{2603}\u{FFFD}");
         // High BMP followed by highest lone continuation
         decode_utf8_to_utf8(b"a\xE2\x98\x83\xBFZ", "a\u{2603}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xE2\x98\x83\xBF", "a\u{2603}\u{FFFD}");
         // Astral followed by lowest lone continuation
         decode_utf8_to_utf8(b"a\xF0\x9F\x92\xA9\x80Z", "a\u{1F4A9}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xF0\x9F\x92\xA9\x80", "a\u{1F4A9}\u{FFFD}");
         // Astral followed by highest lone continuation
         decode_utf8_to_utf8(b"a\xF0\x9F\x92\xA9\xBFZ", "a\u{1F4A9}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\xF0\x9F\x92\xA9\xBF", "a\u{1F4A9}\u{FFFD}");

         // Boundary conditions
         // Lowest single-byte
         decode_valid_utf8("Z\x00");
         decode_valid_utf8("Z\x00Z");
         // Lowest single-byte as two-byte overlong sequence
         decode_utf8_to_utf8(b"a\xC0\x80", "a\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xC0\x80Z", "a\u{FFFD}\u{FFFD}Z");
         // Lowest single-byte as three-byte overlong sequence
         decode_utf8_to_utf8(b"a\xE0\x80\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xE0\x80\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Lowest single-byte as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x80\x80\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x80\x80\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // One below lowest single-byte
         decode_utf8_to_utf8(b"a\xFF", "a\u{FFFD}");
         decode_utf8_to_utf8(b"a\xFFZ", "a\u{FFFD}Z");
         // Highest single-byte
         decode_valid_utf8("a\x7F");
         decode_valid_utf8("a\x7FZ");
         // Highest single-byte as two-byte overlong sequence
         decode_utf8_to_utf8(b"a\xC1\xBF", "a\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xC1\xBFZ", "a\u{FFFD}\u{FFFD}Z");
         // Highest single-byte as three-byte overlong sequence
         decode_utf8_to_utf8(b"a\xE0\x81\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xE0\x81\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Highest single-byte as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x80\x81\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x80\x81\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // One past highest single byte (also lone continuation)
         decode_utf8_to_utf8(b"a\x80Z", "a\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\x80", "a\u{FFFD}");
         // Two lone continuations
         decode_utf8_to_utf8(b"a\x80\x80Z", "a\u{FFFD}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\x80\x80", "a\u{FFFD}\u{FFFD}");
         // Three lone continuations
         decode_utf8_to_utf8(b"a\x80\x80\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\x80\x80\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}");
         // Four lone continuations
         decode_utf8_to_utf8(b"a\x80\x80\x80\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         decode_utf8_to_utf8(b"a\x80\x80\x80\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         // Lowest two-byte
         decode_utf8_to_utf8(b"a\xC2\x80", "a\u{0080}");
         decode_utf8_to_utf8(b"a\xC2\x80Z", "a\u{0080}Z");
         // Lowest two-byte as three-byte overlong sequence
         decode_utf8_to_utf8(b"a\xE0\x82\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xE0\x82\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Lowest two-byte as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x80\x82\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x80\x82\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Lead one below lowest two-byte
         decode_utf8_to_utf8(b"a\xC1\x80", "a\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xC1\x80Z", "a\u{FFFD}\u{FFFD}Z");
         // Trail one below lowest two-byte
         decode_utf8_to_utf8(b"a\xC2\x7F", "a\u{FFFD}\u{007F}");
         decode_utf8_to_utf8(b"a\xC2\x7FZ", "a\u{FFFD}\u{007F}Z");
         // Highest two-byte
         decode_utf8_to_utf8(b"a\xDF\xBF", "a\u{07FF}");
         decode_utf8_to_utf8(b"a\xDF\xBFZ", "a\u{07FF}Z");
         // Highest two-byte as three-byte overlong sequence
         decode_utf8_to_utf8(b"a\xE0\x9F\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xE0\x9F\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Highest two-byte as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x80\x9F\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x80\x9F\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Lowest three-byte
         decode_utf8_to_utf8(b"a\xE0\xA0\x80", "a\u{0800}");
         decode_utf8_to_utf8(b"a\xE0\xA0\x80Z", "a\u{0800}Z");
         // Lowest three-byte as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x80\xA0\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x80\xA0\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Highest below surrogates
         decode_utf8_to_utf8(b"a\xED\x9F\xBF", "a\u{D7FF}");
         decode_utf8_to_utf8(b"a\xED\x9F\xBFZ", "a\u{D7FF}Z");
         // Highest below surrogates as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x8D\x9F\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x8D\x9F\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // First surrogate
         decode_utf8_to_utf8(b"a\xED\xA0\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xED\xA0\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // First surrogate as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x8D\xA0\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x8D\xA0\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Last surrogate
         decode_utf8_to_utf8(b"a\xED\xBF\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xED\xBF\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Last surrogate as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x8D\xBF\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x8D\xBF\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Lowest above surrogates
         decode_utf8_to_utf8(b"a\xEE\x80\x80", "a\u{E000}");
         decode_utf8_to_utf8(b"a\xEE\x80\x80Z", "a\u{E000}Z");
         // Lowest above surrogates as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x8E\x80\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x8E\x80\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Highest three-byte
         decode_utf8_to_utf8(b"a\xEF\xBF\xBF", "a\u{FFFF}");
         decode_utf8_to_utf8(b"a\xEF\xBF\xBFZ", "a\u{FFFF}Z");
         // Highest three-byte as four-byte overlong sequence
         decode_utf8_to_utf8(b"a\xF0\x8F\xBF\xBF", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF0\x8F\xBF\xBFZ", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");
         // Lowest four-byte
         decode_utf8_to_utf8(b"a\xF0\x90\x80\x80", "a\u{10000}");
         decode_utf8_to_utf8(b"a\xF0\x90\x80\x80Z", "a\u{10000}Z");
         // Highest four-byte
         decode_utf8_to_utf8(b"a\xF4\x8F\xBF\xBF", "a\u{10FFFF}");
         decode_utf8_to_utf8(b"a\xF4\x8F\xBF\xBFZ", "a\u{10FFFF}Z");
         // One past highest four-byte
         decode_utf8_to_utf8(b"a\xF4\x90\x80\x80", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF4\x90\x80\x80Z", "a\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}Z");

         // Highest four-byte with last byte replaced with 0xFF
         decode_utf8_to_utf8(b"a\xF4\x8F\xBF\xFF", "a\u{FFFD}\u{FFFD}");
         decode_utf8_to_utf8(b"a\xF4\x8F\xBF\xFFZ", "a\u{FFFD}\u{FFFD}Z");
     }

     #[test]
     fn test_utf8_encode() {
         // Empty
         encode_utf8_from_utf16(&[], b"");
         encode_utf8_from_utf8("", b"");

         encode_utf8_from_utf16(&[0x0000], "\u{0000}".as_bytes());
         encode_utf8_from_utf16(&[0x007F], "\u{007F}".as_bytes());
         encode_utf8_from_utf16(&[0x0080], "\u{0080}".as_bytes());
         encode_utf8_from_utf16(&[0x07FF], "\u{07FF}".as_bytes());
         encode_utf8_from_utf16(&[0x0800], "\u{0800}".as_bytes());
         encode_utf8_from_utf16(&[0xD7FF], "\u{D7FF}".as_bytes());
         encode_utf8_from_utf16(&[0xD800], "\u{FFFD}".as_bytes());
         encode_utf8_from_utf16(&[0xD800, 0x0062], "\u{FFFD}\u{0062}".as_bytes());
         encode_utf8_from_utf16(&[0xDFFF], "\u{FFFD}".as_bytes());
         encode_utf8_from_utf16(&[0xDFFF, 0x0062], "\u{FFFD}\u{0062}".as_bytes());
         encode_utf8_from_utf16(&[0xE000], "\u{E000}".as_bytes());
         encode_utf8_from_utf16(&[0xFFFF], "\u{FFFF}".as_bytes());
         encode_utf8_from_utf16(&[0xD800, 0xDC00], "\u{10000}".as_bytes());
         encode_utf8_from_utf16(&[0xDBFF, 0xDFFF], "\u{10FFFF}".as_bytes());
         encode_utf8_from_utf16(&[0xDC00, 0xDEDE], "\u{FFFD}\u{FFFD}".as_bytes());
     }

     #[test]
     fn test_encode_utf8_from_utf16_with_output_limit() {
         encode_utf8_from_utf16_with_output_limit(&[0x0062], "\u{62}", 1, EncoderResult::InputEmpty);
         encode_utf8_from_utf16_with_output_limit(&[0x00A7], "\u{A7}", 2, EncoderResult::InputEmpty);
         encode_utf8_from_utf16_with_output_limit(
             &[0x2603],
             "\u{2603}",
             3,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDCA9],
             "\u{1F4A9}",
             4,
             EncoderResult::InputEmpty,
         );

         encode_utf8_from_utf16_with_output_limit(&[0x00A7], "", 1, EncoderResult::OutputFull);
         encode_utf8_from_utf16_with_output_limit(&[0x2603], "", 2, EncoderResult::OutputFull);
         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDCA9],
             "",
             3,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x0062],
             "\u{63}\u{62}",
             2,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00A7],
             "\u{63}\u{A7}",
             3,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x2603],
             "\u{63}\u{2603}",
             4,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0xD83D, 0xDCA9],
             "\u{63}\u{1F4A9}",
             5,
             EncoderResult::InputEmpty,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00A7],
             "\u{63}",
             2,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x2603],
             "\u{63}",
             3,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0xD83D, 0xDCA9],
             "\u{63}",
             4,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x00B6, 0x0062],
             "\u{B6}\u{62}",
             3,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x00B6, 0x00A7],
             "\u{B6}\u{A7}",
             4,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x00B6, 0x2603],
             "\u{B6}\u{2603}",
             5,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x00B6, 0xD83D, 0xDCA9],
             "\u{B6}\u{1F4A9}",
             6,
             EncoderResult::InputEmpty,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x00B6, 0x00A7],
             "\u{B6}",
             3,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x00B6, 0x2603],
             "\u{B6}",
             4,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x00B6, 0xD83D, 0xDCA9],
             "\u{B6}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x0062],
             "\u{263A}\u{62}",
             4,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x00A7],
             "\u{263A}\u{A7}",
             5,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x2603],
             "\u{263A}\u{2603}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0xD83D, 0xDCA9],
             "\u{263A}\u{1F4A9}",
             7,
             EncoderResult::InputEmpty,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x00A7],
             "\u{263A}",
             4,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x2603],
             "\u{263A}",
             5,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0xD83D, 0xDCA9],
             "\u{263A}",
             6,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDE0E, 0x0062],
             "\u{1F60E}\u{62}",
             5,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDE0E, 0x00A7],
             "\u{1F60E}\u{A7}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDE0E, 0x2603],
             "\u{1F60E}\u{2603}",
             7,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDE0E, 0xD83D, 0xDCA9],
             "\u{1F60E}\u{1F4A9}",
             8,
             EncoderResult::InputEmpty,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDE0E, 0x00A7],
             "\u{1F60E}",
             5,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDE0E, 0x2603],
             "\u{1F60E}",
             6,
             EncoderResult::OutputFull,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0xD83D, 0xDE0E, 0xD83D, 0xDCA9],
             "\u{1F60E}",
             7,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x0062, 0x0062],
             "\u{63}\u{B6}\u{62}\u{62}",
             5,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x0062, 0x0062],
             "\u{63}\u{B6}\u{62}",
             4,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x0062, 0x0062, 0x0062],
             "\u{63}\u{B6}\u{62}\u{62}\u{62}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x0062, 0x0062, 0x0062],
             "\u{63}\u{B6}\u{62}\u{62}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x0062, 0x0062],
             "\u{263A}\u{62}\u{62}",
             5,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x0062, 0x0062],
             "\u{263A}\u{62}",
             4,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x0062, 0x0062, 0x0062],
             "\u{263A}\u{62}\u{62}\u{62}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x0062, 0x0062, 0x0062],
             "\u{263A}\u{62}\u{62}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x00A7],
             "\u{63}\u{B6}\u{A7}",
             5,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x00A7],
             "\u{63}\u{B6}",
             4,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x00A7, 0x0062],
             "\u{63}\u{B6}\u{A7}\u{62}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x00A7, 0x0062],
             "\u{63}\u{B6}\u{A7}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x00A7, 0x0062],
             "\u{263A}\u{A7}\u{62}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x00A7, 0x0062],
             "\u{263A}\u{A7}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x0062, 0x00A7],
             "\u{63}\u{B6}\u{62}\u{A7}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x0062, 0x00A7],
             "\u{63}\u{B6}\u{62}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x0062, 0x00A7],
             "\u{263A}\u{62}\u{A7}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x0062, 0x00A7],
             "\u{263A}\u{62}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x2603],
             "\u{63}\u{B6}\u{2603}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0x2603],
             "\u{63}\u{B6}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x2603],
             "\u{263A}\u{2603}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0x2603],
             "\u{263A}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0xD83D],
             "\u{63}\u{B6}\u{FFFD}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0xD83D],
             "\u{63}\u{B6}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0xD83D],
             "\u{263A}\u{FFFD}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0xD83D],
             "\u{263A}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0xDCA9],
             "\u{63}\u{B6}\u{FFFD}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x0063, 0x00B6, 0xDCA9],
             "\u{63}\u{B6}",
             5,
             EncoderResult::OutputFull,
         );

         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0xDCA9],
             "\u{263A}\u{FFFD}",
             6,
             EncoderResult::InputEmpty,
         );
         encode_utf8_from_utf16_with_output_limit(
             &[0x263A, 0xDCA9],
             "\u{263A}",
             5,
             EncoderResult::OutputFull,
         );
     }

     #[test]
     fn test_utf8_max_length_from_utf16() {
         let mut encoder = UTF_8.new_encoder();
         let mut output = [0u8; 13];
         let input = &[0x2C9Fu16, 0x2CA9u16, 0x2CA3u16, 0x2C9Fu16];
         let needed = encoder
             .max_buffer_length_from_utf16_without_replacement(input.len())
             .unwrap();
         let (result, _, _) =
             encoder.encode_from_utf16_without_replacement(input, &mut output[..needed], true);
         assert_eq!(result, EncoderResult::InputEmpty);
     }

     #[test]
     fn test_decode_bom_prefixed_split_byte_triple() {
         let mut output = [0u16; 20];
         let mut decoder = UTF_8.new_decoder();
         {
             let needed = decoder.max_utf16_buffer_length(1).unwrap();
             let (result, read, written, had_errors) =
                 decoder.decode_to_utf16(b"\xEF", &mut output[..needed], false);
             assert_eq!(result, CoderResult::InputEmpty);
             assert_eq!(read, 1);
             assert_eq!(written, 0);
             assert!(!had_errors);
         }
         {
             let needed = decoder.max_utf16_buffer_length(1).unwrap();
             let (result, read, written, had_errors) =
                 decoder.decode_to_utf16(b"\xBF", &mut output[..needed], false);
             assert_eq!(result, CoderResult::InputEmpty);
             assert_eq!(read, 1);
             assert_eq!(written, 0);
             assert!(!had_errors);
         }
         {
             let needed = decoder.max_utf16_buffer_length(1).unwrap();
             let (result, read, written, had_errors) =
                 decoder.decode_to_utf16(b"\xBE", &mut output[..needed], true);
             assert_eq!(result, CoderResult::InputEmpty);
             assert_eq!(read, 1);
             assert_eq!(written, 1);
             assert!(!had_errors);
             assert_eq!(output[0], 0xFFFE);
         }
     }

     #[test]
     fn test_decode_bom_prefixed_split_byte_pair() {
         let mut output = [0u16; 20];
         let mut decoder = UTF_8.new_decoder();
         {
             let needed = decoder.max_utf16_buffer_length(1).unwrap();
             let (result, read, written, had_errors) =
                 decoder.decode_to_utf16(b"\xEF", &mut output[..needed], false);
             assert_eq!(result, CoderResult::InputEmpty);
             assert_eq!(read, 1);
             assert_eq!(written, 0);
             assert!(!had_errors);
         }
         {
             let needed = decoder.max_utf16_buffer_length(1).unwrap();
             let (result, read, written, had_errors) =
                 decoder.decode_to_utf16(b"\xBC", &mut output[..needed], true);
             assert_eq!(result, CoderResult::InputEmpty);
             assert_eq!(read, 1);
             assert_eq!(written, 1);
             assert!(had_errors);
             assert_eq!(output[0], 0xFFFD);
         }
     }

     #[test]
     fn test_decode_bom_prefix() {
         let mut output = [0u16; 20];
         let mut decoder = UTF_8.new_decoder();
         {
             let needed = decoder.max_utf16_buffer_length(1).unwrap();
             let (result, read, written, had_errors) =
                 decoder.decode_to_utf16(b"\xEF", &mut output[..needed], true);
             assert_eq!(result, CoderResult::InputEmpty);
             assert_eq!(read, 1);
             assert_eq!(written, 1);
             assert!(had_errors);
             assert_eq!(output[0], 0xFFFD);
         }
     }

     #[test]
     fn test_tail() {
         let mut output = [0u16; 1];
         let mut decoder = UTF_8.new_decoder_without_bom_handling();
         {
             let (result, read, written, had_errors) =
                 decoder.decode_to_utf16("\u{E4}a".as_bytes(), &mut output[..], false);
             assert_eq!(result, CoderResult::OutputFull);
             assert_eq!(read, 2);
             assert_eq!(written, 1);
             assert!(!had_errors);
             assert_eq!(output[0], 0x00E4);
         }
     }
 }