vendor/regex-1.7.3/tests/test_default.rs - toolchain/rustc - Git at Google

 #![cfg_attr(feature = "pattern", feature(pattern))]

 use regex;

 // Due to macro scoping rules, this definition only applies for the modules
 // defined below. Effectively, it allows us to use the same tests for both
 // native and dynamic regexes.
 //
 // This is also used to test the various matching engines. This one exercises
 // the normal code path which automatically chooses the engine based on the
 // regex and the input. Other dynamic tests explicitly set the engine to use.
 macro_rules! regex_new {
     ($re:expr) => {{
         use regex::Regex;
         Regex::new($re)
     }};
 }

 macro_rules! regex {
     ($re:expr) => {
         regex_new!($re).unwrap()
     };
 }

 macro_rules! regex_set_new {
     ($re:expr) => {{
         use regex::RegexSet;
         RegexSet::new($re)
     }};
 }

 macro_rules! regex_set {
     ($res:expr) => {
         regex_set_new!($res).unwrap()
     };
 }

 // Must come before other module definitions.
 include!("macros_str.rs");
 include!("macros.rs");

 mod api;
 mod api_str;
 mod crazy;
 mod flags;
 mod fowler;
 mod misc;
 mod multiline;
 mod noparse;
 mod regression;
 mod regression_fuzz;
 mod replace;
 mod searcher;
 mod set;
 mod shortest_match;
 mod suffix_reverse;
 #[cfg(feature = "unicode")]
 mod unicode;
 #[cfg(feature = "unicode-perl")]
 mod word_boundary;
 #[cfg(feature = "unicode-perl")]
 mod word_boundary_unicode;

 #[test]
 fn disallow_non_utf8() {
     assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
     assert!(regex::Regex::new(r"(?-u).").is_err());
     assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
     assert!(regex::Regex::new(r"(?-u)☃").is_err());
 }

 #[test]
 fn disallow_octal() {
     assert!(regex::Regex::new(r"\0").is_err());
 }

 #[test]
 fn allow_octal() {
     assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
 }

 #[test]
 fn oibits() {
     use regex::bytes;
     use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
     use std::panic::{RefUnwindSafe, UnwindSafe};

     fn assert_send<T: Send>() {}
     fn assert_sync<T: Sync>() {}
     fn assert_unwind_safe<T: UnwindSafe>() {}
     fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}

     assert_send::<Regex>();
     assert_sync::<Regex>();
     assert_unwind_safe::<Regex>();
     assert_ref_unwind_safe::<Regex>();
     assert_send::<RegexBuilder>();
     assert_sync::<RegexBuilder>();
     assert_unwind_safe::<RegexBuilder>();
     assert_ref_unwind_safe::<RegexBuilder>();

     assert_send::<bytes::Regex>();
     assert_sync::<bytes::Regex>();
     assert_unwind_safe::<bytes::Regex>();
     assert_ref_unwind_safe::<bytes::Regex>();
     assert_send::<bytes::RegexBuilder>();
     assert_sync::<bytes::RegexBuilder>();
     assert_unwind_safe::<bytes::RegexBuilder>();
     assert_ref_unwind_safe::<bytes::RegexBuilder>();

     assert_send::<RegexSet>();
     assert_sync::<RegexSet>();
     assert_unwind_safe::<RegexSet>();
     assert_ref_unwind_safe::<RegexSet>();
     assert_send::<RegexSetBuilder>();
     assert_sync::<RegexSetBuilder>();
     assert_unwind_safe::<RegexSetBuilder>();
     assert_ref_unwind_safe::<RegexSetBuilder>();

     assert_send::<bytes::RegexSet>();
     assert_sync::<bytes::RegexSet>();
     assert_unwind_safe::<bytes::RegexSet>();
     assert_ref_unwind_safe::<bytes::RegexSet>();
     assert_send::<bytes::RegexSetBuilder>();
     assert_sync::<bytes::RegexSetBuilder>();
     assert_unwind_safe::<bytes::RegexSetBuilder>();
     assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
 }

 // See: https://github.com/rust-lang/regex/issues/568
 #[test]
 fn oibits_regression() {
     use regex::Regex;
     use std::panic;

     let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
 }

 // See: https://github.com/rust-lang/regex/issues/750
 #[test]
 #[cfg(target_pointer_width = "64")]
 fn regex_is_reasonably_small() {
     use std::mem::size_of;

     use regex::bytes;
     use regex::{Regex, RegexSet};

     assert_eq!(16, size_of::<Regex>());
     assert_eq!(16, size_of::<RegexSet>());
     assert_eq!(16, size_of::<bytes::Regex>());
     assert_eq!(16, size_of::<bytes::RegexSet>());
 }

 // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
 // See: CVE-2022-24713
 //
 // We test that our regex compiler will correctly return a "too big" error when
 // we try to use a very large repetition on an *empty* sub-expression.
 //
 // At the time this test was written, the regex compiler does not represent
 // empty sub-expressions with any bytecode instructions. In effect, it's an
 // "optimization" to leave them out, since they would otherwise correspond
 // to an unconditional JUMP in the regex bytecode (i.e., an unconditional
 // epsilon transition in the NFA graph). Therefore, an empty sub-expression
 // represents an interesting case for the compiler's size limits. Since it
 // doesn't actually contribute any additional memory to the compiled regex
 // instructions, the size limit machinery never detects it. Instead, it just
 // dumbly tries to compile the empty sub-expression N times, where N is the
 // repetition size.
 //
 // When N is very large, this will cause the compiler to essentially spin and
 // do nothing for a decently large amount of time. It causes the regex to take
 // quite a bit of time to compile, despite the concrete syntax of the regex
 // being quite small.
 //
 // The degree to which this is actually a problem is somewhat of a judgment
 // call. Some regexes simply take a long time to compile. But in general, you
 // should be able to reasonably control this by setting lower or higher size
 // limits on the compiled object size. But this mitigation doesn't work at all
 // for this case.
 //
 // This particular test is somewhat narrow. It merely checks that regex
 // compilation will, at some point, return a "too big" error. Before the
 // fix landed, this test would eventually fail because the regex would be
 // successfully compiled (after enough time elapsed). So while this test
 // doesn't check that we exit in a reasonable amount of time, it does at least
 // check that we are properly returning an error at some point.
 #[test]
 fn big_empty_regex_fails() {
     use regex::Regex;

     let result = Regex::new("(?:){4294967295}");
     assert!(result.is_err());
 }

 // Below is a "billion laughs" variant of the previous test case.
 #[test]
 fn big_empty_reps_chain_regex_fails() {
     use regex::Regex;

     let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
     assert!(result.is_err());
 }

 // Below is another situation where a zero-length sub-expression can be
 // introduced.
 #[test]
 fn big_zero_reps_regex_fails() {
     use regex::Regex;

     let result = Regex::new(r"x{0}{4294967295}");
     assert!(result.is_err());
 }

 // Testing another case for completeness.
 #[test]
 fn empty_alt_regex_fails() {
     use regex::Regex;

     let result = Regex::new(r"(?:|){4294967295}");
     assert!(result.is_err());
 }

 // Regression test for: https://github.com/rust-lang/regex/issues/969
 #[test]
 fn regression_i969() {
     use regex::Regex;

     let re = Regex::new(r"c.*d\z").unwrap();
     assert_eq!(Some(6), re.shortest_match_at("ababcd", 4));
     assert_eq!(Some(6), re.find_at("ababcd", 4).map(|m| m.end()));
 }
	#![cfg_attr(feature = "pattern", feature(pattern))]

	use regex;

	// Due to macro scoping rules, this definition only applies for the modules
	// defined below. Effectively, it allows us to use the same tests for both
	// native and dynamic regexes.
	//
	// This is also used to test the various matching engines. This one exercises
	// the normal code path which automatically chooses the engine based on the
	// regex and the input. Other dynamic tests explicitly set the engine to use.
	macro_rules! regex_new {
	($re:expr) => {{
	use regex::Regex;
	Regex::new($re)
	}};
	}

	macro_rules! regex {
	($re:expr) => {
	regex_new!($re).unwrap()
	};
	}

	macro_rules! regex_set_new {
	($re:expr) => {{
	use regex::RegexSet;
	RegexSet::new($re)
	}};
	}

	macro_rules! regex_set {
	($res:expr) => {
	regex_set_new!($res).unwrap()
	};
	}

	// Must come before other module definitions.
	include!("macros_str.rs");
	include!("macros.rs");

	mod api;
	mod api_str;
	mod crazy;
	mod flags;
	mod fowler;
	mod misc;
	mod multiline;
	mod noparse;
	mod regression;
	mod regression_fuzz;
	mod replace;
	mod searcher;
	mod set;
	mod shortest_match;
	mod suffix_reverse;
	#[cfg(feature = "unicode")]
	mod unicode;
	#[cfg(feature = "unicode-perl")]
	mod word_boundary;
	#[cfg(feature = "unicode-perl")]
	mod word_boundary_unicode;

	#[test]
	fn disallow_non_utf8() {
	assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
	assert!(regex::Regex::new(r"(?-u).").is_err());
	assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
	assert!(regex::Regex::new(r"(?-u)☃").is_err());
	}

	#[test]
	fn disallow_octal() {
	assert!(regex::Regex::new(r"\0").is_err());
	}

	#[test]
	fn allow_octal() {
	assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
	}

	#[test]
	fn oibits() {
	use regex::bytes;
	use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
	use std::panic::{RefUnwindSafe, UnwindSafe};

	fn assert_send<T: Send>() {}
	fn assert_sync<T: Sync>() {}
	fn assert_unwind_safe<T: UnwindSafe>() {}
	fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}

	assert_send::<Regex>();
	assert_sync::<Regex>();
	assert_unwind_safe::<Regex>();
	assert_ref_unwind_safe::<Regex>();
	assert_send::<RegexBuilder>();
	assert_sync::<RegexBuilder>();
	assert_unwind_safe::<RegexBuilder>();
	assert_ref_unwind_safe::<RegexBuilder>();

	assert_send::<bytes::Regex>();
	assert_sync::<bytes::Regex>();
	assert_unwind_safe::<bytes::Regex>();
	assert_ref_unwind_safe::<bytes::Regex>();
	assert_send::<bytes::RegexBuilder>();
	assert_sync::<bytes::RegexBuilder>();
	assert_unwind_safe::<bytes::RegexBuilder>();
	assert_ref_unwind_safe::<bytes::RegexBuilder>();

	assert_send::<RegexSet>();
	assert_sync::<RegexSet>();
	assert_unwind_safe::<RegexSet>();
	assert_ref_unwind_safe::<RegexSet>();
	assert_send::<RegexSetBuilder>();
	assert_sync::<RegexSetBuilder>();
	assert_unwind_safe::<RegexSetBuilder>();
	assert_ref_unwind_safe::<RegexSetBuilder>();

	assert_send::<bytes::RegexSet>();
	assert_sync::<bytes::RegexSet>();
	assert_unwind_safe::<bytes::RegexSet>();
	assert_ref_unwind_safe::<bytes::RegexSet>();
	assert_send::<bytes::RegexSetBuilder>();
	assert_sync::<bytes::RegexSetBuilder>();
	assert_unwind_safe::<bytes::RegexSetBuilder>();
	assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
	}

	// See: https://github.com/rust-lang/regex/issues/568
	#[test]
	fn oibits_regression() {
	use regex::Regex;
	use std::panic;

	let _ = panic::catch_unwind(\|\| Regex::new("a").unwrap());
	}

	// See: https://github.com/rust-lang/regex/issues/750
	#[test]
	#[cfg(target_pointer_width = "64")]
	fn regex_is_reasonably_small() {
	use std::mem::size_of;

	use regex::bytes;
	use regex::{Regex, RegexSet};

	assert_eq!(16, size_of::<Regex>());
	assert_eq!(16, size_of::<RegexSet>());
	assert_eq!(16, size_of::<bytes::Regex>());
	assert_eq!(16, size_of::<bytes::RegexSet>());
	}

	// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
	// See: CVE-2022-24713
	//
	// We test that our regex compiler will correctly return a "too big" error when
	// we try to use a very large repetition on an empty sub-expression.
	//
	// At the time this test was written, the regex compiler does not represent
	// empty sub-expressions with any bytecode instructions. In effect, it's an
	// "optimization" to leave them out, since they would otherwise correspond
	// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
	// epsilon transition in the NFA graph). Therefore, an empty sub-expression
	// represents an interesting case for the compiler's size limits. Since it
	// doesn't actually contribute any additional memory to the compiled regex
	// instructions, the size limit machinery never detects it. Instead, it just
	// dumbly tries to compile the empty sub-expression N times, where N is the
	// repetition size.
	//
	// When N is very large, this will cause the compiler to essentially spin and
	// do nothing for a decently large amount of time. It causes the regex to take
	// quite a bit of time to compile, despite the concrete syntax of the regex
	// being quite small.
	//
	// The degree to which this is actually a problem is somewhat of a judgment
	// call. Some regexes simply take a long time to compile. But in general, you
	// should be able to reasonably control this by setting lower or higher size
	// limits on the compiled object size. But this mitigation doesn't work at all
	// for this case.
	//
	// This particular test is somewhat narrow. It merely checks that regex
	// compilation will, at some point, return a "too big" error. Before the
	// fix landed, this test would eventually fail because the regex would be
	// successfully compiled (after enough time elapsed). So while this test
	// doesn't check that we exit in a reasonable amount of time, it does at least
	// check that we are properly returning an error at some point.
	#[test]
	fn big_empty_regex_fails() {
	use regex::Regex;

	let result = Regex::new("(?:){4294967295}");
	assert!(result.is_err());
	}

	// Below is a "billion laughs" variant of the previous test case.
	#[test]
	fn big_empty_reps_chain_regex_fails() {
	use regex::Regex;

	let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
	assert!(result.is_err());
	}

	// Below is another situation where a zero-length sub-expression can be
	// introduced.
	#[test]
	fn big_zero_reps_regex_fails() {
	use regex::Regex;

	let result = Regex::new(r"x{0}{4294967295}");
	assert!(result.is_err());
	}

	// Testing another case for completeness.
	#[test]
	fn empty_alt_regex_fails() {
	use regex::Regex;

	let result = Regex::new(r"(?:\|){4294967295}");
	assert!(result.is_err());
	}

	// Regression test for: https://github.com/rust-lang/regex/issues/969
	#[test]
	fn regression_i969() {
	use regex::Regex;

	let re = Regex::new(r"c.*d\z").unwrap();
	assert_eq!(Some(6), re.shortest_match_at("ababcd", 4));
	assert_eq!(Some(6), re.find_at("ababcd", 4).map(\|m\| m.end()));
	}