Skip to content

Commit

Permalink
🎨 add typos checking and fix errors
Browse files Browse the repository at this point in the history
  • Loading branch information
mishamsk committed Nov 5, 2024
1 parent 6d5422c commit 66df1af
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 58 deletions.
7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,10 @@ inherits = "release"
debug = true
incremental = true
codegen-units = 3

[workspace.metadata.typos]
default.extend-ignore-identifiers-re = ["(?i)KWM[a-z0-9_]+"]
# One comment for TokenType
default.extend-ignore-words-re = ["LOWCASE"]
# One test with nOt in that specific case
default.extend-words = { "Ot" = "Ot" }
3 changes: 2 additions & 1 deletion Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ tasks:
cmds:
- |
if test -d .venv; then
echo Found existing virutal env at .venv. Removing...
echo Found existing virtual env at .venv. Removing...
rm -fR .venv
else
echo No venv exist. Creating anew...
Expand Down Expand Up @@ -54,6 +54,7 @@ tasks:
desc: "Run linters"
cmds:
- uv run pre-commit run --all-files
- typos

# Cleanup tasks
clean-pyc:
Expand Down
2 changes: 1 addition & 1 deletion crates/sas-lexer-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ enum Commands {
/// Run lexing over samples and generate various statistics used for
/// debugging and optimization.
Stats {
/// Path to put the resulting stat tabls to. If not provided
/// Path to put the resulting stat tables to. If not provided
/// only the summary report on console is produced
#[cfg(feature = "polars")]
#[arg(short, long)]
Expand Down
14 changes: 7 additions & 7 deletions crates/sas-lexer/src/lexer/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ const TOKEN_INFO_CAPACITY_DIVISOR: usize = 3;
/// Heursitic for determining an optimal initial capactiy for unescaped string literals vector
/// I didn't do a scientific test of the frequency of quote usage, but between
/// %nrstr, %str, 'string with '' quote', "string with "" quote" and the fact
/// that one occurence of smth. like %% inside %nrstr will put the whole contents
/// that one occurrence of smth. like %% inside %nrstr will put the whole contents
/// into our buffer - thought we may afford overallocating. Let it be 5%
const STR_LIT_CAPACITY_DIVISOR: usize = 20;

Expand All @@ -136,7 +136,7 @@ const STR_LIT_CAPACITY_DIVISOR: usize = 20;
pub(super) struct WorkTokenizedBuffer {
line_infos: Vec<LineInfo>,
token_infos: Vec<TokenInfo>,
/// Stores unescaped string literals as a single continous string
/// Stores unescaped string literals as a single continuous string
/// Payloads of tokens that repsent strings with escaped characters
/// store the range of the literal within this string.
string_literals_buffer: String,
Expand Down Expand Up @@ -243,7 +243,7 @@ impl WorkTokenizedBuffer {
"Token byte offset before previous token byte offset"
);
} else {
// It may be poosible for the first token to start at offset > 0
// It may be possible for the first token to start at offset > 0
// e.g. due to BOM
}

Expand Down Expand Up @@ -338,7 +338,7 @@ impl WorkTokenizedBuffer {
"Token byte offset before previous token byte offset"
);
} else {
// It may be poosible for the first token to start at offset > 0
// It may be possible for the first token to start at offset > 0
// e.g. due to BOM
}

Expand Down Expand Up @@ -528,7 +528,7 @@ pub struct ResolvedTokenInfo {
pub start: u32,

/// Zero-based char index of the token end in the source string. Will
/// point to the character immediatelly after the token.
/// point to the character immediately after the token.
/// Char here means a Unicode code point, not graphemes. This is
/// what Python uses to index strings, and IDEs show for cursor position.
/// u32 as we only support 4gb files
Expand All @@ -544,7 +544,7 @@ pub struct ResolvedTokenInfo {
pub end_line: u32,

/// Zero-based column of the token end on the end line.
/// This is the column of the character immediatelly after the token.
/// This is the column of the character immediately after the token.
pub end_column: u32,

/// Extra data associated with the token.
Expand Down Expand Up @@ -809,7 +809,7 @@ impl TokenizedBuffer {
.map_or(Err(ErrorKind::TokenIdxOutOfBounds), |t| Ok(t.channel))
}

/// Retruns the text slice from the source using the token range.
/// Returns the text slice from the source using the token range.
/// If the range is empty, returns `None`, not an empty string!
///
/// # Errors
Expand Down
2 changes: 1 addition & 1 deletion crates/sas-lexer/src/lexer/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub enum ErrorKind {
// chances of false-positives
// Codes 4001-4999. Make sure to preserve
// the existing codes & the range. The latter is used in classification impl
// CURENTLY UNUSED
// CURRENTLY UNUSED
// Internal errors. Codes 9001-9999. Make sure to preserve
// the existing codes & the range. The latter is used in classification impl
#[strum(message = "No checkpoint to rollback")]
Expand Down
4 changes: 2 additions & 2 deletions crates/sas-lexer/src/lexer/lexer_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ pub(crate) enum LexerMode {
///
/// If `check_macro_label` is true, it will also check if the next
/// non-ws or cstyle follower is `:`, which is a macro label. In this case
/// it will chang the preceeding `MacroIdentifier` token type to `MacroLabel`
/// it will chang the preceding `MacroIdentifier` token type to `MacroLabel`
/// in addition to lexing `;` on hidden channel.
///
/// Note - it should always be preceded by the `WsOrCStyleCommentOnly` mode
Expand All @@ -257,7 +257,7 @@ pub(crate) enum LexerMode {
///
/// Then always adds necessary mode stack to parse the macro call arg value.
///
/// Note - it should alwys be preceded by the `WsOrCStyleCommentOnly` mode
/// Note - it should always be preceded by the `WsOrCStyleCommentOnly` mode
/// and a checkpoint created!
MaybeMacroCallArgAssign {
/// The packed flags for macro argument name or value. See `MacroArgNameValueFlags`
Expand Down
10 changes: 5 additions & 5 deletions crates/sas-lexer/src/lexer/macro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub(super) fn is_macro_amp<I: Iterator<Item = char>>(mut chars: I) -> (bool, u32

#[inline]
pub(super) fn is_macro_eval_quotable_op(c: char) -> bool {
// Expermientally shown to work! (ignores the %)
// Experimentally shown to work! (ignores the %)
// e.g. `%^ 0` returned 1 (true)
['~', '^', '='].contains(&c)
}
Expand Down Expand Up @@ -99,7 +99,7 @@ pub(super) const fn needs_macro_sep(
tok_type: TokenType,
) -> bool {
// Not following a proper statement delimiter
// And preceeds a standalone macro statement
// And precedes a standalone macro statement
!matches!(
prev_token_type,
None | Some(
Expand Down Expand Up @@ -173,7 +173,7 @@ pub(super) fn lex_macro_call_stat_or_label(
// we know it is not a keyword and can skip the test right away
let mut is_ascii = true;

// Eat the identifier. We can safely use `is_xid_continue` becase the caller
// Eat the identifier. We can safely use `is_xid_continue` because the caller
// already checked that the first character is a valid start of an identifier
cursor.eat_while(|c| {
if c.is_ascii() {
Expand Down Expand Up @@ -222,7 +222,7 @@ pub(super) fn lex_macro_call_stat_or_label(
.map_err(|()| ErrorKind::InternalErrorOutOfBounds)
}

/// Predicate to check if the following chracters are one of macro logical
/// Predicate to check if the following characters are one of macro logical
/// expression mnemonics (eq, ne, lt, le, gt, ge, and, or, not, in).
///
/// Must be passed an iterator that starts with the first character
Expand Down Expand Up @@ -290,7 +290,7 @@ pub(super) fn is_macro_eval_mnemonic<I: Iterator<Item = char>>(
pub(super) fn is_macro_stat(input: &str) -> bool {
debug_assert!(input.as_bytes().iter().next().map_or(false, |&c| c == b'%'));

// Unfortunatelly this one needs a very inefficient lookahead
// Unfortunately this one needs a very inefficient lookahead
// to check if we have any statement upfront.
let mut is_ascii = true;

Expand Down
Loading

0 comments on commit 66df1af

Please sign in to comment.