blob: 19a319af1127585fb4e6783f2023203c200dedc8 [file] [log] [blame]
#![cfg_attr(feature = "pattern", feature(pattern))]
use regex;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
// native and dynamic regexes.
//
// This is also used to test the various matching engines. This one exercises
// the normal code path which automatically chooses the engine based on the
// regex and the input. Other dynamic tests explicitly set the engine to use.
macro_rules! regex_new {
($re:expr) => {{
use regex::Regex;
Regex::new($re)
}};
}
macro_rules! regex {
($re:expr) => {
regex_new!($re).unwrap()
};
}
macro_rules! regex_set_new {
($re:expr) => {{
use regex::RegexSet;
RegexSet::new($re)
}};
}
macro_rules! regex_set {
($res:expr) => {
regex_set_new!($res).unwrap()
};
}
// Must come before other module definitions.
include!("macros_str.rs");
include!("macros.rs");
mod api;
mod api_str;
mod crazy;
mod flags;
mod fowler;
mod misc;
mod multiline;
mod noparse;
mod regression;
mod regression_fuzz;
mod replace;
mod searcher;
mod set;
mod shortest_match;
mod suffix_reverse;
#[cfg(feature = "unicode")]
mod unicode;
#[cfg(feature = "unicode-perl")]
mod word_boundary;
#[cfg(feature = "unicode-perl")]
mod word_boundary_unicode;
#[test]
fn disallow_non_utf8() {
assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
assert!(regex::Regex::new(r"(?-u).").is_err());
assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
assert!(regex::Regex::new(r"(?-u)☃").is_err());
}
#[test]
fn disallow_octal() {
assert!(regex::Regex::new(r"\0").is_err());
}
#[test]
fn allow_octal() {
assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
}
#[test]
fn oibits() {
use regex::bytes;
use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
use std::panic::{RefUnwindSafe, UnwindSafe};
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
fn assert_unwind_safe<T: UnwindSafe>() {}
fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
assert_send::<Regex>();
assert_sync::<Regex>();
assert_unwind_safe::<Regex>();
assert_ref_unwind_safe::<Regex>();
assert_send::<RegexBuilder>();
assert_sync::<RegexBuilder>();
assert_unwind_safe::<RegexBuilder>();
assert_ref_unwind_safe::<RegexBuilder>();
assert_send::<bytes::Regex>();
assert_sync::<bytes::Regex>();
assert_unwind_safe::<bytes::Regex>();
assert_ref_unwind_safe::<bytes::Regex>();
assert_send::<bytes::RegexBuilder>();
assert_sync::<bytes::RegexBuilder>();
assert_unwind_safe::<bytes::RegexBuilder>();
assert_ref_unwind_safe::<bytes::RegexBuilder>();
assert_send::<RegexSet>();
assert_sync::<RegexSet>();
assert_unwind_safe::<RegexSet>();
assert_ref_unwind_safe::<RegexSet>();
assert_send::<RegexSetBuilder>();
assert_sync::<RegexSetBuilder>();
assert_unwind_safe::<RegexSetBuilder>();
assert_ref_unwind_safe::<RegexSetBuilder>();
assert_send::<bytes::RegexSet>();
assert_sync::<bytes::RegexSet>();
assert_unwind_safe::<bytes::RegexSet>();
assert_ref_unwind_safe::<bytes::RegexSet>();
assert_send::<bytes::RegexSetBuilder>();
assert_sync::<bytes::RegexSetBuilder>();
assert_unwind_safe::<bytes::RegexSetBuilder>();
assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
}
// See: https://github.com/rust-lang/regex/issues/568
#[test]
fn oibits_regression() {
use regex::Regex;
use std::panic;
let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
}
// See: https://github.com/rust-lang/regex/issues/750
#[test]
#[cfg(target_pointer_width = "64")]
fn regex_is_reasonably_small() {
use std::mem::size_of;
use regex::bytes;
use regex::{Regex, RegexSet};
assert_eq!(16, size_of::<Regex>());
assert_eq!(16, size_of::<RegexSet>());
assert_eq!(16, size_of::<bytes::Regex>());
assert_eq!(16, size_of::<bytes::RegexSet>());
}
// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
// See: CVE-2022-24713
//
// We test that our regex compiler will correctly return a "too big" error when
// we try to use a very large repetition on an *empty* sub-expression.
//
// At the time this test was written, the regex compiler does not represent
// empty sub-expressions with any bytecode instructions. In effect, it's an
// "optimization" to leave them out, since they would otherwise correspond
// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
// epsilon transition in the NFA graph). Therefore, an empty sub-expression
// represents an interesting case for the compiler's size limits. Since it
// doesn't actually contribute any additional memory to the compiled regex
// instructions, the size limit machinery never detects it. Instead, it just
// dumbly tries to compile the empty sub-expression N times, where N is the
// repetition size.
//
// When N is very large, this will cause the compiler to essentially spin and
// do nothing for a decently large amount of time. It causes the regex to take
// quite a bit of time to compile, despite the concrete syntax of the regex
// being quite small.
//
// The degree to which this is actually a problem is somewhat of a judgment
// call. Some regexes simply take a long time to compile. But in general, you
// should be able to reasonably control this by setting lower or higher size
// limits on the compiled object size. But this mitigation doesn't work at all
// for this case.
//
// This particular test is somewhat narrow. It merely checks that regex
// compilation will, at some point, return a "too big" error. Before the
// fix landed, this test would eventually fail because the regex would be
// successfully compiled (after enough time elapsed). So while this test
// doesn't check that we exit in a reasonable amount of time, it does at least
// check that we are properly returning an error at some point.
#[test]
fn big_empty_regex_fails() {
use regex::Regex;
let result = Regex::new("(?:){4294967295}");
assert!(result.is_err());
}
// Below is a "billion laughs" variant of the previous test case.
#[test]
fn big_empty_reps_chain_regex_fails() {
use regex::Regex;
let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
assert!(result.is_err());
}
// Below is another situation where a zero-length sub-expression can be
// introduced.
#[test]
fn big_zero_reps_regex_fails() {
use regex::Regex;
let result = Regex::new(r"x{0}{4294967295}");
assert!(result.is_err());
}
// Testing another case for completeness.
#[test]
fn empty_alt_regex_fails() {
use regex::Regex;
let result = Regex::new(r"(?:|){4294967295}");
assert!(result.is_err());
}
// Regression test for: https://github.com/rust-lang/regex/issues/969
#[test]
fn regression_i969() {
use regex::Regex;
let re = Regex::new(r"c.*d\z").unwrap();
assert_eq!(Some(6), re.shortest_match_at("ababcd", 4));
assert_eq!(Some(6), re.find_at("ababcd", 4).map(|m| m.end()));
}