src/tools/clippy/clippy_lints/src/tabs_in_doc_comments.rs - toolchain/rustc - Git at Google

 use clippy_utils::diagnostics::span_lint_and_sugg;
 use rustc_ast::ast;
 use rustc_errors::Applicability;
 use rustc_lint::{EarlyContext, EarlyLintPass};
 use rustc_session::{declare_lint_pass, declare_tool_lint};
 use rustc_span::{BytePos, Span};

 declare_clippy_lint! {
     /// ### What it does
     /// Checks doc comments for usage of tab characters.
     ///
     /// ### Why is this bad?
     /// The rust style-guide promotes spaces instead of tabs for indentation.
     /// To keep a consistent view on the source, also doc comments should not have tabs.
     /// Also, explaining ascii-diagrams containing tabs can get displayed incorrectly when the
     /// display settings of the author and reader differ.
     ///
     /// ### Example
     /// ```no_run
     /// ///
     /// /// Struct to hold two strings:
     /// /// 	- first		one
     /// /// 	- second	one
     /// pub struct DoubleString {
     ///    ///
     ///    /// 	- First String:
     ///    /// 		- needs to be inside here
     ///    first_string: String,
     ///    ///
     ///    /// 	- Second String:
     ///    /// 		- needs to be inside here
     ///    second_string: String,
     ///}
     /// ```
     ///
     /// Will be converted to:
     /// ```no_run
     /// ///
     /// /// Struct to hold two strings:
     /// ///     - first        one
     /// ///     - second    one
     /// pub struct DoubleString {
     ///    ///
     ///    ///     - First String:
     ///    ///         - needs to be inside here
     ///    first_string: String,
     ///    ///
     ///    ///     - Second String:
     ///    ///         - needs to be inside here
     ///    second_string: String,
     ///}
     /// ```
     #[clippy::version = "1.41.0"]
     pub TABS_IN_DOC_COMMENTS,
     style,
     "using tabs in doc comments is not recommended"
 }

 declare_lint_pass!(TabsInDocComments => [TABS_IN_DOC_COMMENTS]);

 impl TabsInDocComments {
     fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
         if let ast::AttrKind::DocComment(_, comment) = attr.kind {
             let comment = comment.as_str();

             for (lo, hi) in get_chunks_of_tabs(comment) {
                 // +3 skips the opening delimiter
                 let new_span = Span::new(
                     attr.span.lo() + BytePos(3 + lo),
                     attr.span.lo() + BytePos(3 + hi),
                     attr.span.ctxt(),
                     attr.span.parent(),
                 );
                 span_lint_and_sugg(
                     cx,
                     TABS_IN_DOC_COMMENTS,
                     new_span,
                     "using tabs in doc comments is not recommended",
                     "consider using four spaces per tab",
                     "    ".repeat((hi - lo) as usize),
                     Applicability::MaybeIncorrect,
                 );
             }
         }
     }
 }

 impl EarlyLintPass for TabsInDocComments {
     fn check_attribute(&mut self, cx: &EarlyContext<'_>, attribute: &ast::Attribute) {
         Self::warn_if_tabs_in_doc(cx, attribute);
     }
 }

 ///
 /// scans the string for groups of tabs and returns the start(inclusive) and end positions
 /// (exclusive) of all groups
 /// e.g. "sd\tasd\t\taa" will be converted to [(2, 3), (6, 8)] as
 ///       012 3456 7 89
 ///         ^-^  ^---^
 fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
     let line_length_way_to_long = "doc comment longer than 2^32 chars";
     let mut spans: Vec<(u32, u32)> = vec![];
     let mut current_start: u32 = 0;

     // tracker to decide if the last group of tabs is not closed by a non-tab character
     let mut is_active = false;

     // Note that we specifically need the char _byte_ indices here, not the positional indexes
     // within the char array to deal with multi-byte characters properly. `char_indices` does
     // exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
     let char_indices: Vec<_> = the_str.char_indices().collect();

     if let [(_, '\t')] = char_indices.as_slice() {
         return vec![(0, 1)];
     }

     for entry in char_indices.windows(2) {
         match entry {
             [(_, '\t'), (_, '\t')] => {
                 // either string starts with double tab, then we have to set it active,
                 // otherwise is_active is true anyway
                 is_active = true;
             },
             [(_, _), (index_b, '\t')] => {
                 // as ['\t', '\t'] is excluded, this has to be a start of a tab group,
                 // set indices accordingly
                 is_active = true;
                 current_start = u32::try_from(*index_b).unwrap();
             },
             [(_, '\t'), (index_b, _)] => {
                 // this now has to be an end of the group, hence we have to push a new tuple
                 is_active = false;
                 spans.push((current_start, u32::try_from(*index_b).unwrap()));
             },
             _ => {},
         }
     }

     // only possible when tabs are at the end, insert last group
     if is_active {
         spans.push((
             current_start,
             u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long),
         ));
     }

     spans
 }

 #[cfg(test)]
 mod tests_for_get_chunks_of_tabs {
     use super::get_chunks_of_tabs;

     #[test]
     fn test_unicode_han_string() {
         let res = get_chunks_of_tabs(" \u{4f4d}\t");

         assert_eq!(res, vec![(4, 5)]);
     }

     #[test]
     fn test_empty_string() {
         let res = get_chunks_of_tabs("");

         assert_eq!(res, vec![]);
     }

     #[test]
     fn test_simple() {
         let res = get_chunks_of_tabs("sd\t\t\taa");

         assert_eq!(res, vec![(2, 5)]);
     }

     #[test]
     fn test_only_t() {
         let res = get_chunks_of_tabs("\t\t");

         assert_eq!(res, vec![(0, 2)]);
     }

     #[test]
     fn test_only_one_t() {
         let res = get_chunks_of_tabs("\t");

         assert_eq!(res, vec![(0, 1)]);
     }

     #[test]
     fn test_double() {
         let res = get_chunks_of_tabs("sd\tasd\t\taa");

         assert_eq!(res, vec![(2, 3), (6, 8)]);
     }

     #[test]
     fn test_start() {
         let res = get_chunks_of_tabs("\t\taa");

         assert_eq!(res, vec![(0, 2)]);
     }

     #[test]
     fn test_end() {
         let res = get_chunks_of_tabs("aa\t\t");

         assert_eq!(res, vec![(2, 4)]);
     }

     #[test]
     fn test_start_single() {
         let res = get_chunks_of_tabs("\taa");

         assert_eq!(res, vec![(0, 1)]);
     }

     #[test]
     fn test_end_single() {
         let res = get_chunks_of_tabs("aa\t");

         assert_eq!(res, vec![(2, 3)]);
     }

     #[test]
     fn test_no_tabs() {
         let res = get_chunks_of_tabs("dsfs");

         assert_eq!(res, vec![]);
     }
 }
	use clippy_utils::diagnostics::span_lint_and_sugg;
	use rustc_ast::ast;
	use rustc_errors::Applicability;
	use rustc_lint::{EarlyContext, EarlyLintPass};
	use rustc_session::{declare_lint_pass, declare_tool_lint};
	use rustc_span::{BytePos, Span};

	declare_clippy_lint! {
	/// ### What it does
	/// Checks doc comments for usage of tab characters.
	///
	/// ### Why is this bad?
	/// The rust style-guide promotes spaces instead of tabs for indentation.
	/// To keep a consistent view on the source, also doc comments should not have tabs.
	/// Also, explaining ascii-diagrams containing tabs can get displayed incorrectly when the
	/// display settings of the author and reader differ.
	///
	/// ### Example
	/// ```no_run
	/// ///
	/// /// Struct to hold two strings:
	/// /// - first one
	/// /// - second one
	/// pub struct DoubleString {
	/// ///
	/// /// - First String:
	/// /// - needs to be inside here
	/// first_string: String,
	/// ///
	/// /// - Second String:
	/// /// - needs to be inside here
	/// second_string: String,
	///}
	/// ```
	///
	/// Will be converted to:
	/// ```no_run
	/// ///
	/// /// Struct to hold two strings:
	/// /// - first one
	/// /// - second one
	/// pub struct DoubleString {
	/// ///
	/// /// - First String:
	/// /// - needs to be inside here
	/// first_string: String,
	/// ///
	/// /// - Second String:
	/// /// - needs to be inside here
	/// second_string: String,
	///}
	/// ```
	#[clippy::version = "1.41.0"]
	pub TABS_IN_DOC_COMMENTS,
	style,
	"using tabs in doc comments is not recommended"
	}

	declare_lint_pass!(TabsInDocComments => [TABS_IN_DOC_COMMENTS]);

	impl TabsInDocComments {
	fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
	if let ast::AttrKind::DocComment(_, comment) = attr.kind {
	let comment = comment.as_str();

	for (lo, hi) in get_chunks_of_tabs(comment) {
	// +3 skips the opening delimiter
	let new_span = Span::new(
	attr.span.lo() + BytePos(3 + lo),
	attr.span.lo() + BytePos(3 + hi),
	attr.span.ctxt(),
	attr.span.parent(),
	);
	span_lint_and_sugg(
	cx,
	TABS_IN_DOC_COMMENTS,
	new_span,
	"using tabs in doc comments is not recommended",
	"consider using four spaces per tab",
	" ".repeat((hi - lo) as usize),
	Applicability::MaybeIncorrect,
	);
	}
	}
	}
	}

	impl EarlyLintPass for TabsInDocComments {
	fn check_attribute(&mut self, cx: &EarlyContext<'_>, attribute: &ast::Attribute) {
	Self::warn_if_tabs_in_doc(cx, attribute);
	}
	}

	///
	/// scans the string for groups of tabs and returns the start(inclusive) and end positions
	/// (exclusive) of all groups
	/// e.g. "sd\tasd\t\taa" will be converted to [(2, 3), (6, 8)] as
	/// 012 3456 7 89
	/// ^-^ ^---^
	fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
	let line_length_way_to_long = "doc comment longer than 2^32 chars";
	let mut spans: Vec<(u32, u32)> = vec![];
	let mut current_start: u32 = 0;

	// tracker to decide if the last group of tabs is not closed by a non-tab character
	let mut is_active = false;

	// Note that we specifically need the char _byte_ indices here, not the positional indexes
	// within the char array to deal with multi-byte characters properly. `char_indices` does
	// exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
	let char_indices: Vec<_> = the_str.char_indices().collect();

	if let [(_, '\t')] = char_indices.as_slice() {
	return vec![(0, 1)];
	}

	for entry in char_indices.windows(2) {
	match entry {
	[(_, '\t'), (_, '\t')] => {
	// either string starts with double tab, then we have to set it active,
	// otherwise is_active is true anyway
	is_active = true;
	},
	[(_, _), (index_b, '\t')] => {
	// as ['\t', '\t'] is excluded, this has to be a start of a tab group,
	// set indices accordingly
	is_active = true;
	current_start = u32::try_from(*index_b).unwrap();
	},
	[(_, '\t'), (index_b, _)] => {
	// this now has to be an end of the group, hence we have to push a new tuple
	is_active = false;
	spans.push((current_start, u32::try_from(*index_b).unwrap()));
	},
	_ => {},
	}
	}

	// only possible when tabs are at the end, insert last group
	if is_active {
	spans.push((
	current_start,
	u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long),
	));
	}

	spans
	}

	#[cfg(test)]
	mod tests_for_get_chunks_of_tabs {
	use super::get_chunks_of_tabs;

	#[test]
	fn test_unicode_han_string() {
	let res = get_chunks_of_tabs(" \u{4f4d}\t");

	assert_eq!(res, vec![(4, 5)]);
	}

	#[test]
	fn test_empty_string() {
	let res = get_chunks_of_tabs("");

	assert_eq!(res, vec![]);
	}

	#[test]
	fn test_simple() {
	let res = get_chunks_of_tabs("sd\t\t\taa");

	assert_eq!(res, vec![(2, 5)]);
	}

	#[test]
	fn test_only_t() {
	let res = get_chunks_of_tabs("\t\t");

	assert_eq!(res, vec![(0, 2)]);
	}

	#[test]
	fn test_only_one_t() {
	let res = get_chunks_of_tabs("\t");

	assert_eq!(res, vec![(0, 1)]);
	}

	#[test]
	fn test_double() {
	let res = get_chunks_of_tabs("sd\tasd\t\taa");

	assert_eq!(res, vec![(2, 3), (6, 8)]);
	}

	#[test]
	fn test_start() {
	let res = get_chunks_of_tabs("\t\taa");

	assert_eq!(res, vec![(0, 2)]);
	}

	#[test]
	fn test_end() {
	let res = get_chunks_of_tabs("aa\t\t");

	assert_eq!(res, vec![(2, 4)]);
	}

	#[test]
	fn test_start_single() {
	let res = get_chunks_of_tabs("\taa");

	assert_eq!(res, vec![(0, 1)]);
	}

	#[test]
	fn test_end_single() {
	let res = get_chunks_of_tabs("aa\t");

	assert_eq!(res, vec![(2, 3)]);
	}

	#[test]
	fn test_no_tabs() {
	let res = get_chunks_of_tabs("dsfs");

	assert_eq!(res, vec![]);
	}
	}