🎨 add typos checking and fix errors

mishamsk · Nov 5, 2024 · 66df1af · 66df1af
1 parent 6d5422c
commit 66df1af
Show file tree

Hide file tree

Showing 11 changed files with 66 additions and 58 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -65,3 +65,10 @@ inherits = "release"
 debug = true
 incremental = true
 codegen-units = 3
+
+[workspace.metadata.typos]
+default.extend-ignore-identifiers-re = ["(?i)KWM[a-z0-9_]+"]
+# One comment for TokenType
+default.extend-ignore-words-re = ["LOWCASE"]
+# One test with nOt in that specific case
+default.extend-words = { "Ot" = "Ot" }
diff --git a/Taskfile.yml b/Taskfile.yml
@@ -14,7 +14,7 @@ tasks:
     cmds:
       - |
         if test -d .venv; then
-            echo Found existing virutal env at .venv. Removing...
+            echo Found existing virtual env at .venv. Removing...
             rm -fR .venv
         else
             echo No venv exist. Creating anew...
@@ -54,6 +54,7 @@ tasks:
     desc: "Run linters"
     cmds:
       - uv run pre-commit run --all-files
+      - typos
 
   # Cleanup tasks
   clean-pyc:

diff --git a/crates/sas-lexer-cli/src/main.rs b/crates/sas-lexer-cli/src/main.rs
@@ -65,7 +65,7 @@ enum Commands {
     /// Run lexing over samples and generate various statistics used for
     /// debugging and optimization.
     Stats {
-        /// Path to put the resulting stat tabls to. If not provided
+        /// Path to put the resulting stat tables to. If not provided
         /// only the summary report on console is produced
         #[cfg(feature = "polars")]
         #[arg(short, long)]

diff --git a/crates/sas-lexer/src/lexer/buffer.rs b/crates/sas-lexer/src/lexer/buffer.rs
@@ -122,7 +122,7 @@ const TOKEN_INFO_CAPACITY_DIVISOR: usize = 3;
 /// Heursitic for determining an optimal initial capactiy for unescaped string literals vector
 /// I didn't do a scientific test of the frequency of quote usage, but between
 /// %nrstr, %str, 'string with '' quote', "string with "" quote" and the fact
-/// that one occurence of smth. like %% inside %nrstr will put the whole contents
+/// that one occurrence of smth. like %% inside %nrstr will put the whole contents
 /// into our buffer - thought we may afford overallocating. Let it be 5%
 const STR_LIT_CAPACITY_DIVISOR: usize = 20;
 
@@ -136,7 +136,7 @@ const STR_LIT_CAPACITY_DIVISOR: usize = 20;
 pub(super) struct WorkTokenizedBuffer {
     line_infos: Vec<LineInfo>,
     token_infos: Vec<TokenInfo>,
-    /// Stores unescaped string literals as a single continous string
+    /// Stores unescaped string literals as a single continuous string
     /// Payloads of tokens that repsent strings with escaped characters
     /// store the range of the literal within this string.
     string_literals_buffer: String,
@@ -243,7 +243,7 @@ impl WorkTokenizedBuffer {
                     "Token byte offset before previous token byte offset"
                 );
             } else {
-                // It may be poosible for the first token to start at offset > 0
+                // It may be possible for the first token to start at offset > 0
                 // e.g. due to BOM
             }
 
@@ -338,7 +338,7 @@ impl WorkTokenizedBuffer {
                     "Token byte offset before previous token byte offset"
                 );
             } else {
-                // It may be poosible for the first token to start at offset > 0
+                // It may be possible for the first token to start at offset > 0
                 // e.g. due to BOM
             }
 
@@ -528,7 +528,7 @@ pub struct ResolvedTokenInfo {
     pub start: u32,
 
     /// Zero-based char index of the token end in the source string. Will
-    /// point to the character immediatelly after the token.
+    /// point to the character immediately after the token.
     /// Char here means a Unicode code point, not graphemes. This is
     /// what Python uses to index strings, and IDEs show for cursor position.
     /// u32 as we only support 4gb files
@@ -544,7 +544,7 @@ pub struct ResolvedTokenInfo {
     pub end_line: u32,
 
     /// Zero-based column of the token end on the end line.
-    /// This is the column of the character immediatelly after the token.
+    /// This is the column of the character immediately after the token.
     pub end_column: u32,
 
     /// Extra data associated with the token.
@@ -809,7 +809,7 @@ impl TokenizedBuffer {
             .map_or(Err(ErrorKind::TokenIdxOutOfBounds), |t| Ok(t.channel))
     }
 
-    /// Retruns the text slice from the source using the token range.
+    /// Returns the text slice from the source using the token range.
     /// If the range is empty, returns `None`, not an empty string!
     ///
     /// # Errors

diff --git a/crates/sas-lexer/src/lexer/error.rs b/crates/sas-lexer/src/lexer/error.rs
@@ -87,7 +87,7 @@ pub enum ErrorKind {
     // chances of false-positives
     // Codes 4001-4999. Make sure to preserve
     // the existing codes & the range. The latter is used in classification impl
-    // CURENTLY UNUSED
+    // CURRENTLY UNUSED
     // Internal errors. Codes 9001-9999. Make sure to preserve
     // the existing codes & the range. The latter is used in classification impl
     #[strum(message = "No checkpoint to rollback")]

diff --git a/crates/sas-lexer/src/lexer/lexer_mode.rs b/crates/sas-lexer/src/lexer/lexer_mode.rs
@@ -243,7 +243,7 @@ pub(crate) enum LexerMode {
     ///
     /// If `check_macro_label` is true, it will also check if the next
     /// non-ws or cstyle follower is `:`, which is a macro label. In this case
-    /// it will chang the preceeding `MacroIdentifier` token type to `MacroLabel`
+    /// it will chang the preceding `MacroIdentifier` token type to `MacroLabel`
     /// in addition to lexing `;` on hidden channel.
     ///
     /// Note - it should always be preceded by the `WsOrCStyleCommentOnly` mode
@@ -257,7 +257,7 @@ pub(crate) enum LexerMode {
     ///
     /// Then always adds necessary mode stack to parse the macro call arg value.
     ///
-    /// Note - it should alwys be preceded by the `WsOrCStyleCommentOnly` mode
+    /// Note - it should always be preceded by the `WsOrCStyleCommentOnly` mode
     /// and a checkpoint created!
     MaybeMacroCallArgAssign {
         /// The packed flags for macro argument name or value. See `MacroArgNameValueFlags`

diff --git a/crates/sas-lexer/src/lexer/macro.rs b/crates/sas-lexer/src/lexer/macro.rs
@@ -39,7 +39,7 @@ pub(super) fn is_macro_amp<I: Iterator<Item = char>>(mut chars: I) -> (bool, u32
 
 #[inline]
 pub(super) fn is_macro_eval_quotable_op(c: char) -> bool {
-    // Expermientally shown to work! (ignores the %)
+    // Experimentally shown to work! (ignores the %)
     // e.g. `%^ 0` returned 1 (true)
     ['~', '^', '='].contains(&c)
 }
@@ -99,7 +99,7 @@ pub(super) const fn needs_macro_sep(
     tok_type: TokenType,
 ) -> bool {
     // Not following a proper statement delimiter
-    // And preceeds a standalone macro statement
+    // And precedes a standalone macro statement
     !matches!(
         prev_token_type,
         None | Some(
@@ -173,7 +173,7 @@ pub(super) fn lex_macro_call_stat_or_label(
     // we know it is not a keyword and can skip the test right away
     let mut is_ascii = true;
 
-    // Eat the identifier. We can safely use `is_xid_continue` becase the caller
+    // Eat the identifier. We can safely use `is_xid_continue` because the caller
     // already checked that the first character is a valid start of an identifier
     cursor.eat_while(|c| {
         if c.is_ascii() {
@@ -222,7 +222,7 @@ pub(super) fn lex_macro_call_stat_or_label(
         .map_err(|()| ErrorKind::InternalErrorOutOfBounds)
 }
 
-/// Predicate to check if the following chracters are one of macro logical
+/// Predicate to check if the following characters are one of macro logical
 /// expression mnemonics (eq, ne, lt, le, gt, ge, and, or, not, in).
 ///
 /// Must be passed an iterator that starts with the first character
@@ -290,7 +290,7 @@ pub(super) fn is_macro_eval_mnemonic<I: Iterator<Item = char>>(
 pub(super) fn is_macro_stat(input: &str) -> bool {
     debug_assert!(input.as_bytes().iter().next().map_or(false, |&c| c == b'%'));
 
-    // Unfortunatelly this one needs a very inefficient lookahead
+    // Unfortunately this one needs a very inefficient lookahead
     // to check if we have any statement upfront.
     let mut is_ascii = true;