From 29fef1e7aa47216d3b9fe4a9416f6021c14b4561 Mon Sep 17 00:00:00 2001
From: Arthur <48595927+ArthurZucker@users.noreply.github.com>
Date: Tue, 12 Mar 2024 21:24:21 +1100
Subject: [PATCH] [`remove black`] And use ruff (#1436)

* nits

* Fixing deps.

* Ruff update.

* Import order matters.

* Fix.

* Revert ruff fix.

* Visualizer.

* Putting back the imports.

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
---
 bindings/python/Makefile                      |  6 +-
 bindings/python/examples/example.py           | 11 +--
 .../python/examples/train_with_datasets.py    |  2 +-
 .../python/py_src/tokenizers/__init__.pyi     | 89 +++++++++++++++++--
 .../py_src/tokenizers/decoders/__init__.pyi   | 21 +++--
 .../py_src/tokenizers/models/__init__.pyi     | 35 ++++++--
 .../tokenizers/normalizers/__init__.pyi       | 40 ++++++---
 .../tokenizers/pre_tokenizers/__init__.pyi    | 35 +++++---
 .../py_src/tokenizers/processors/__init__.pyi | 17 ++--
 .../py_src/tokenizers/tools/visualizer.py     |  6 +-
 .../py_src/tokenizers/trainers/__init__.pyi   |  2 -
 bindings/python/pyproject.toml                | 20 ++++-
 bindings/python/scripts/convert.py            | 24 ++---
 .../python/scripts/sentencepiece_extractor.py |  9 +-
 bindings/python/scripts/spm_parity_check.py   | 39 +++-----
 bindings/python/stub.py                       | 31 +++----
 bindings/python/tests/bindings/test_models.py |  1 -
 .../python/tests/bindings/test_normalizers.py |  3 +-
 .../python/tests/bindings/test_processors.py  |  8 +-
 .../python/tests/bindings/test_tokenizer.py   |  5 +-
 .../tests/documentation/test_pipeline.py      |  1 -
 .../tests/documentation/test_quicktour.py     |  4 -
 .../test_tutorial_train_from_iterators.py     |  1 +
 .../implementations/test_base_tokenizer.py    |  2 -
 .../implementations/test_bert_wordpiece.py    |  2 -
 .../implementations/test_byte_level_bpe.py    |  2 -
 .../tests/implementations/test_char_bpe.py    |  2 -
 .../implementations/test_sentencepiece.py     |  2 -
 bindings/python/tests/test_serialization.py   |  7 +-
 29 files changed, 258 insertions(+), 169 deletions(-)

diff --git a/bindings/python/Makefile b/bindings/python/Makefile
index f7a05dac7..8eeaf83a1 100644
--- a/bindings/python/Makefile
+++ b/bindings/python/Makefile
@@ -8,12 +8,14 @@ check_dirs := examples py_src/tokenizers tests
 # Format source code automatically
 style:
 	python stub.py
-	black --line-length 119 --target-version py35 $(check_dirs)
+	ruff check  $(check_dirs) --fix 
+	ruff format $(check_dirs)t 
 
 # Check the source code is formatted correctly
 check-style:
 	python stub.py --check
-	black --check --line-length 119 --target-version py35 examples py_src/tokenizers tests
+	ruff check examples py_src/tokenizers tests 
+	ruff format --check examples py_src/tokenizers tests 
 
 TESTS_RESOURCES = $(DATA_DIR)/small.txt $(DATA_DIR)/roberta.json
 
diff --git a/bindings/python/examples/example.py b/bindings/python/examples/example.py
index 97b903401..d62390644 100644
--- a/bindings/python/examples/example.py
+++ b/bindings/python/examples/example.py
@@ -4,16 +4,15 @@
 
 from tqdm import tqdm
 
-
-logging.getLogger("transformers").disabled = True
-logging.getLogger("transformers.tokenization_utils").disabled = True
-
 from tokenizers import Tokenizer, decoders, pre_tokenizers
 from tokenizers.models import BPE, WordPiece
 from tokenizers.normalizers import BertNormalizer
 from tokenizers.processors import BertProcessing
 from transformers import BertTokenizer, GPT2Tokenizer
 
+logging.getLogger("transformers").disabled = True
+logging.getLogger("transformers.tokenization_utils").disabled = True
+
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--type", default="gpt2", type=str, help="The type of tokenizer (bert|gpt2)")
@@ -51,9 +50,7 @@
 If the implementation is hard to explain, it's a bad idea.
 If the implementation is easy to explain, it may be a good idea.
 Namespaces are one honking great idea -- let's do more of those!
-""".split(
-        "\n"
-    )
+""".split("\n")
 
 if args.type == "gpt2":
     print("Running GPT-2 tokenizer")
diff --git a/bindings/python/examples/train_with_datasets.py b/bindings/python/examples/train_with_datasets.py
index 7c3168342..b54376a2a 100644
--- a/bindings/python/examples/train_with_datasets.py
+++ b/bindings/python/examples/train_with_datasets.py
@@ -1,6 +1,6 @@
 import datasets
 
-from tokenizers import Tokenizer, models, normalizers, pre_tokenizers, trainers
+from tokenizers import Tokenizer, models, normalizers, pre_tokenizers
 
 
 # Build a tokenizer
diff --git a/bindings/python/py_src/tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/__init__.pyi
index 7c21c5b56..5dbc665dc 100644
--- a/bindings/python/py_src/tokenizers/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/__init__.pyi
@@ -34,39 +34,44 @@ class AddedToken:
             Defines whether this token should be skipped when decoding.
 
     """
-
     def __init__(self, content, single_word=False, lstrip=False, rstrip=False, normalized=True, special=False):
         pass
+
     @property
     def content(self):
         """
         Get the content of this :obj:`AddedToken`
         """
         pass
+
     @property
     def lstrip(self):
         """
         Get the value of the :obj:`lstrip` option
         """
         pass
+
     @property
     def normalized(self):
         """
         Get the value of the :obj:`normalized` option
         """
         pass
+
     @property
     def rstrip(self):
         """
         Get the value of the :obj:`rstrip` option
         """
         pass
+
     @property
     def single_word(self):
         """
         Get the value of the :obj:`single_word` option
         """
         pass
+
     @property
     def special(self):
         """
@@ -78,7 +83,6 @@ class Encoding:
     """
     The :class:`~tokenizers.Encoding` represents the output of a :class:`~tokenizers.Tokenizer`.
     """
-
     @property
     def attention_mask(self):
         """
@@ -92,6 +96,7 @@ class Encoding:
            :obj:`List[int]`: The attention mask
         """
         pass
+
     def char_to_token(self, char_pos, sequence_index=0):
         """
         Get the token that contains the char at the given position in the input sequence.
@@ -106,6 +111,7 @@ class Encoding:
             :obj:`int`: The index of the token that contains this char in the encoded sequence
         """
         pass
+
     def char_to_word(self, char_pos, sequence_index=0):
         """
         Get the word that contains the char at the given position in the input sequence.
@@ -120,6 +126,7 @@ class Encoding:
             :obj:`int`: The index of the word that contains this char in the input sequence
         """
         pass
+
     @property
     def ids(self):
         """
@@ -132,6 +139,7 @@ class Encoding:
             :obj:`List[int]`: The list of IDs
         """
         pass
+
     @staticmethod
     def merge(encodings, growing_offsets=True):
         """
@@ -148,6 +156,7 @@ class Encoding:
             :class:`~tokenizers.Encoding`: The resulting Encoding
         """
         pass
+
     @property
     def n_sequences(self):
         """
@@ -157,6 +166,7 @@ class Encoding:
             :obj:`int`: The number of sequences in this :class:`~tokenizers.Encoding`
         """
         pass
+
     @property
     def offsets(self):
         """
@@ -169,6 +179,7 @@ class Encoding:
             A :obj:`List` of :obj:`Tuple[int, int]`: The list of offsets
         """
         pass
+
     @property
     def overflowing(self):
         """
@@ -183,6 +194,7 @@ class Encoding:
         maximum length.
         """
         pass
+
     def pad(self, length, direction="right", pad_id=0, pad_type_id=0, pad_token="[PAD]"):
         """
         Pad the :class:`~tokenizers.Encoding` at the given length
@@ -204,6 +216,7 @@ class Encoding:
                 The pad token to use
         """
         pass
+
     @property
     def sequence_ids(self):
         """
@@ -217,6 +230,7 @@ class Encoding:
             A :obj:`List` of :obj:`Optional[int]`: A list of optional sequence index.
         """
         pass
+
     def set_sequence_id(self, sequence_id):
         """
         Set the given sequence index
@@ -225,6 +239,7 @@ class Encoding:
         :class:`~tokenizers.Encoding`.
         """
         pass
+
     @property
     def special_tokens_mask(self):
         """
@@ -236,6 +251,7 @@ class Encoding:
             :obj:`List[int]`: The special tokens mask
         """
         pass
+
     def token_to_chars(self, token_index):
         """
         Get the offsets of the token at the given index.
@@ -252,6 +268,7 @@ class Encoding:
             :obj:`Tuple[int, int]`: The token offsets :obj:`(first, last + 1)`
         """
         pass
+
     def token_to_sequence(self, token_index):
         """
         Get the index of the sequence represented by the given token.
@@ -267,6 +284,7 @@ class Encoding:
             :obj:`int`: The sequence id of the given token
         """
         pass
+
     def token_to_word(self, token_index):
         """
         Get the index of the word that contains the token in one of the input sequences.
@@ -283,6 +301,7 @@ class Encoding:
             :obj:`int`: The index of the word in the relevant input sequence.
         """
         pass
+
     @property
     def tokens(self):
         """
@@ -294,6 +313,7 @@ class Encoding:
             :obj:`List[str]`: The list of tokens
         """
         pass
+
     def truncate(self, max_length, stride=0, direction="right"):
         """
         Truncate the :class:`~tokenizers.Encoding` at the given length
@@ -312,6 +332,7 @@ class Encoding:
                 Truncate direction
         """
         pass
+
     @property
     def type_ids(self):
         """
@@ -324,6 +345,7 @@ class Encoding:
             :obj:`List[int]`: The list of type ids
         """
         pass
+
     @property
     def word_ids(self):
         """
@@ -341,6 +363,7 @@ class Encoding:
             A :obj:`List` of :obj:`Optional[int]`: A list of optional word index.
         """
         pass
+
     def word_to_chars(self, word_index, sequence_index=0):
         """
         Get the offsets of the word at the given index in one of the input sequences.
@@ -355,6 +378,7 @@ class Encoding:
             :obj:`Tuple[int, int]`: The range of characters (span) :obj:`(first, last + 1)`
         """
         pass
+
     def word_to_tokens(self, word_index, sequence_index=0):
         """
         Get the encoded tokens corresponding to the word at the given index
@@ -370,6 +394,7 @@ class Encoding:
             :obj:`Tuple[int, int]`: The range of tokens: :obj:`(first, last + 1)`
         """
         pass
+
     @property
     def words(self):
         """
@@ -404,37 +429,42 @@ class NormalizedString:
         sequence: str:
             The string sequence used to initialize this NormalizedString
     """
-
     def append(self, s):
         """
         Append the given sequence to the string
         """
         pass
+
     def clear(self):
         """
         Clears the string
         """
         pass
+
     def filter(self, func):
         """
         Filter each character of the string using the given func
         """
         pass
+
     def for_each(self, func):
         """
         Calls the given function for each character of the string
         """
         pass
+
     def lowercase(self):
         """
         Lowercase the string
         """
         pass
+
     def lstrip(self):
         """
         Strip the left of the string
         """
         pass
+
     def map(self, func):
         """
         Calls the given function for each character of the string
@@ -443,37 +473,44 @@ class NormalizedString:
         returned value **must** be a str of length 1 (ie a character).
         """
         pass
+
     def nfc(self):
         """
         Runs the NFC normalization
         """
         pass
+
     def nfd(self):
         """
         Runs the NFD normalization
         """
         pass
+
     def nfkc(self):
         """
         Runs the NFKC normalization
         """
         pass
+
     def nfkd(self):
         """
         Runs the NFKD normalization
         """
         pass
+
     @property
     def normalized(self):
         """
         The normalized part of the string
         """
         pass
+
     def prepend(self, s):
         """
         Prepend the given sequence to the string
         """
         pass
+
     def replace(self, pattern, content):
         """
         Replace the content of the given pattern with the provided content
@@ -486,16 +523,19 @@ class NormalizedString:
                 The content to be used as replacement
         """
         pass
+
     def rstrip(self):
         """
         Strip the right of the string
         """
         pass
+
     def slice(self, range):
         """
         Slice the string using the given range
         """
         pass
+
     def split(self, pattern, behavior):
         """
         Split the NormalizedString using the given pattern and the specified behavior
@@ -513,11 +553,13 @@ class NormalizedString:
             A list of NormalizedString, representing each split
         """
         pass
+
     def strip(self):
         """
         Strip both ends of the string
         """
         pass
+
     def uppercase(self):
         """
         Uppercase the string
@@ -542,9 +584,9 @@ class PreTokenizedString:
         sequence: str:
             The string sequence used to initialize this PreTokenizedString
     """
-
     def __init__(self, sequence):
         pass
+
     def get_splits(self, offset_referential="original", offset_type="char"):
         """
         Get the splits currently managed by the PreTokenizedString
@@ -565,6 +607,7 @@ class PreTokenizedString:
             A list of splits
         """
         pass
+
     def normalize(self, func):
         """
         Normalize each split of the `PreTokenizedString` using the given `func`
@@ -576,6 +619,7 @@ class PreTokenizedString:
                 NormalizedString allow its modification.
         """
         pass
+
     def split(self, func):
         """
         Split the PreTokenizedString using the given `func`
@@ -590,6 +634,7 @@ class PreTokenizedString:
                 should come from calling either `.split` or `.slice` on the received one.
         """
         pass
+
     def to_encoding(self, type_id=0, word_idx=None):
         """
         Return an Encoding generated from this PreTokenizedString
@@ -607,6 +652,7 @@ class PreTokenizedString:
             An Encoding
         """
         pass
+
     def tokenize(self, func):
         """
         Tokenize each split of the `PreTokenizedString` using the given `func`
@@ -622,7 +668,6 @@ class Regex:
     """
     Instantiate a new Regex with the given pattern
     """
-
     def __init__(self, pattern):
         pass
 
@@ -639,9 +684,9 @@ class Tokenizer:
             The core algorithm that this :obj:`Tokenizer` should be using.
 
     """
-
     def __init__(self, model):
         pass
+
     def add_special_tokens(self, tokens):
         """
         Add the given special tokens to the Tokenizer.
@@ -662,6 +707,7 @@ class Tokenizer:
             :obj:`int`: The number of tokens that were created in the vocabulary
         """
         pass
+
     def add_tokens(self, tokens):
         """
         Add the given tokens to the vocabulary
@@ -678,6 +724,7 @@ class Tokenizer:
             :obj:`int`: The number of tokens that were created in the vocabulary
         """
         pass
+
     def decode(self, ids, skip_special_tokens=True):
         """
         Decode the given list of ids back to a string
@@ -695,6 +742,7 @@ class Tokenizer:
             :obj:`str`: The decoded string
         """
         pass
+
     def decode_batch(self, sequences, skip_special_tokens=True):
         """
         Decode a batch of ids back to their corresponding string
@@ -710,12 +758,14 @@ class Tokenizer:
             :obj:`List[str]`: A list of decoded strings
         """
         pass
+
     @property
     def decoder(self):
         """
         The `optional` :class:`~tokenizers.decoders.Decoder` in use by the Tokenizer
         """
         pass
+
     def enable_padding(
         self, direction="right", pad_id=0, pad_type_id=0, pad_token="[PAD]", length=None, pad_to_multiple_of=None
     ):
@@ -745,6 +795,7 @@ class Tokenizer:
                 the longest sequence in a batch.
         """
         pass
+
     def enable_truncation(self, max_length, stride=0, strategy="longest_first", direction="right"):
         """
         Enable truncation
@@ -765,6 +816,7 @@ class Tokenizer:
                 Truncate direction
         """
         pass
+
     def encode(self, sequence, pair=None, is_pretokenized=False, add_special_tokens=True):
         """
         Encode the given sequence and pair. This method can process raw text sequences
@@ -803,6 +855,7 @@ class Tokenizer:
 
         """
         pass
+
     def encode_batch(self, input, is_pretokenized=False, add_special_tokens=True):
         """
         Encode the given batch of inputs. This method accept both raw text sequences
@@ -838,6 +891,7 @@ class Tokenizer:
 
         """
         pass
+
     @property
     def encode_special_tokens(self):
         """
@@ -850,6 +904,7 @@ class Tokenizer:
 
         """
         pass
+
     @staticmethod
     def from_buffer(buffer):
         """
@@ -863,6 +918,7 @@ class Tokenizer:
             :class:`~tokenizers.Tokenizer`: The new tokenizer
         """
         pass
+
     @staticmethod
     def from_file(path):
         """
@@ -877,6 +933,7 @@ class Tokenizer:
             :class:`~tokenizers.Tokenizer`: The new tokenizer
         """
         pass
+
     @staticmethod
     def from_pretrained(identifier, revision="main", auth_token=None):
         """
@@ -897,6 +954,7 @@ class Tokenizer:
             :class:`~tokenizers.Tokenizer`: The new tokenizer
         """
         pass
+
     @staticmethod
     def from_str(json):
         """
@@ -911,6 +969,7 @@ class Tokenizer:
             :class:`~tokenizers.Tokenizer`: The new tokenizer
         """
         pass
+
     def get_added_tokens_decoder(self):
         """
         Get the underlying vocabulary
@@ -919,6 +978,7 @@ class Tokenizer:
             :obj:`Dict[int, AddedToken]`: The vocabulary
         """
         pass
+
     def get_vocab(self, with_added_tokens=True):
         """
         Get the underlying vocabulary
@@ -931,6 +991,7 @@ class Tokenizer:
             :obj:`Dict[str, int]`: The vocabulary
         """
         pass
+
     def get_vocab_size(self, with_added_tokens=True):
         """
         Get the size of the underlying vocabulary
@@ -943,6 +1004,7 @@ class Tokenizer:
             :obj:`int`: The size of the vocabulary
         """
         pass
+
     def id_to_token(self, id):
         """
         Convert the given id to its corresponding token if it exists
@@ -955,28 +1017,33 @@ class Tokenizer:
             :obj:`Optional[str]`: An optional token, :obj:`None` if out of vocabulary
         """
         pass
+
     @property
     def model(self):
         """
         The :class:`~tokenizers.models.Model` in use by the Tokenizer
         """
         pass
+
     def no_padding(self):
         """
         Disable padding
         """
         pass
+
     def no_truncation(self):
         """
         Disable truncation
         """
         pass
+
     @property
     def normalizer(self):
         """
         The `optional` :class:`~tokenizers.normalizers.Normalizer` in use by the Tokenizer
         """
         pass
+
     def num_special_tokens_to_add(self, is_pair):
         """
         Return the number of special tokens that would be added for single/pair sentences.
@@ -984,6 +1051,7 @@ class Tokenizer:
         :return:
         """
         pass
+
     @property
     def padding(self):
         """
@@ -996,6 +1064,7 @@ class Tokenizer:
                 A dict with the current padding parameters if padding is enabled
         """
         pass
+
     def post_process(self, encoding, pair=None, add_special_tokens=True):
         """
         Apply all the post-processing steps to the given encodings.
@@ -1022,18 +1091,21 @@ class Tokenizer:
             :class:`~tokenizers.Encoding`: The final post-processed encoding
         """
         pass
+
     @property
     def post_processor(self):
         """
         The `optional` :class:`~tokenizers.processors.PostProcessor` in use by the Tokenizer
         """
         pass
+
     @property
     def pre_tokenizer(self):
         """
         The `optional` :class:`~tokenizers.pre_tokenizers.PreTokenizer` in use by the Tokenizer
         """
         pass
+
     def save(self, path, pretty=True):
         """
         Save the :class:`~tokenizers.Tokenizer` to the file at the given path.
@@ -1046,6 +1118,7 @@ class Tokenizer:
                 Whether the JSON file should be pretty formatted.
         """
         pass
+
     def to_str(self, pretty=False):
         """
         Gets a serialized string representing this :class:`~tokenizers.Tokenizer`.
@@ -1058,6 +1131,7 @@ class Tokenizer:
             :obj:`str`: A string representing the serialized Tokenizer
         """
         pass
+
     def token_to_id(self, token):
         """
         Convert the given token to its corresponding id if it exists
@@ -1070,6 +1144,7 @@ class Tokenizer:
             :obj:`Optional[int]`: An optional id, :obj:`None` if out of vocabulary
         """
         pass
+
     def train(self, files, trainer=None):
         """
         Train the Tokenizer using the given files.
@@ -1086,6 +1161,7 @@ class Tokenizer:
                 An optional trainer that should be used to train our Model
         """
         pass
+
     def train_from_iterator(self, iterator, trainer=None, length=None):
         """
         Train the Tokenizer using the provided iterator.
@@ -1109,6 +1185,7 @@ class Tokenizer:
                 provide meaningful progress tracking
         """
         pass
+
     @property
     def truncation(self):
         """
diff --git a/bindings/python/py_src/tokenizers/decoders/__init__.pyi b/bindings/python/py_src/tokenizers/decoders/__init__.pyi
index 83a0e827d..94dda2354 100644
--- a/bindings/python/py_src/tokenizers/decoders/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/decoders/__init__.pyi
@@ -6,7 +6,6 @@ class Decoder:
     This class is not supposed to be instantiated directly. Instead, any implementation of
     a Decoder will return an instance of this class when instantiated.
     """
-
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -29,9 +28,9 @@ class BPEDecoder(Decoder):
             The suffix that was used to caracterize an end-of-word. This suffix will
             be replaced by whitespaces during the decoding
     """
-
     def __init__(self, suffix="</w>"):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -53,9 +52,9 @@ class ByteFallback(Decoder):
     cannot be decoded you will get � instead for each inconvertable byte token
 
     """
-
     def __init__(self):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -76,9 +75,9 @@ class ByteLevel(Decoder):
     This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.ByteLevel`
     :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
     """
-
     def __init__(self):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -105,9 +104,9 @@ class CTC(Decoder):
             Whether to cleanup some tokenization artifacts.
             Mainly spaces before punctuation, and some abbreviated english forms.
     """
-
     def __init__(self, pad_token="<pad>", word_delimiter_token="|", cleanup=True):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -128,9 +127,9 @@ class Fuse(Decoder):
     This is the last step of decoding, this decoder exists only if
     there is need to add other decoders *after* the fusion
     """
-
     def __init__(self):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -157,9 +156,9 @@ class Metaspace(Decoder):
             Whether to add a space to the first word if there isn't already one. This
             lets us treat `hello` exactly like `say hello`.
     """
-
     def __init__(self, replacement="▁", add_prefix_space=True):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -180,9 +179,9 @@ class Replace(Decoder):
     This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.Replace`
     :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
     """
-
     def __init__(self, pattern, content):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -204,9 +203,9 @@ class Sequence(Decoder):
         decoders (:obj:`List[Decoder]`)
             The decoders that need to be chained
     """
-
     def __init__(self, decoders):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -225,9 +224,9 @@ class Strip(Decoder):
     Strip normalizer
     Strips n left characters of each token, or n right characters of each token
     """
-
     def __init__(self, content, left=0, right=0):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
@@ -253,9 +252,9 @@ class WordPiece(Decoder):
             Whether to cleanup some tokenization artifacts. Mainly spaces before punctuation,
             and some abbreviated english forms.
     """
-
     def __init__(self, prefix="##", cleanup=True):
         pass
+
     def decode(self, tokens):
         """
         Decode the given list of tokens to a final string
diff --git a/bindings/python/py_src/tokenizers/models/__init__.pyi b/bindings/python/py_src/tokenizers/models/__init__.pyi
index 0218f8e56..b46f32f25 100644
--- a/bindings/python/py_src/tokenizers/models/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/models/__init__.pyi
@@ -8,7 +8,6 @@ class Model:
 
     This class cannot be constructed directly. Please use one of the concrete models.
     """
-
     def get_trainer(self):
         """
         Get the associated :class:`~tokenizers.trainers.Trainer`
@@ -20,6 +19,7 @@ class Model:
             :class:`~tokenizers.trainers.Trainer`: The Trainer used to train this model
         """
         pass
+
     def id_to_token(self, id):
         """
         Get the token associated to an ID
@@ -32,6 +32,7 @@ class Model:
             :obj:`str`: The token associated to the ID
         """
         pass
+
     def save(self, folder, prefix):
         """
         Save the current model
@@ -51,6 +52,7 @@ class Model:
             :obj:`List[str]`: The list of saved files
         """
         pass
+
     def token_to_id(self, tokens):
         """
         Get the ID associated to a token
@@ -63,6 +65,7 @@ class Model:
             :obj:`int`: The ID associated to the token
         """
         pass
+
     def tokenize(self, sequence):
         """
         Tokenize a sequence
@@ -110,7 +113,6 @@ class BPE(Model):
         byte_fallback (:obj:`bool`, `optional`):
             Whether to use spm byte-fallback trick (defaults to False)
     """
-
     def __init__(
         self,
         vocab=None,
@@ -124,6 +126,7 @@ class BPE(Model):
         byte_fallback=False,
     ):
         pass
+
     @staticmethod
     def from_file(cls, vocab, merge, **kwargs):
         """
@@ -149,6 +152,7 @@ class BPE(Model):
             :class:`~tokenizers.models.BPE`: An instance of BPE loaded from these files
         """
         pass
+
     def get_trainer(self):
         """
         Get the associated :class:`~tokenizers.trainers.Trainer`
@@ -160,6 +164,7 @@ class BPE(Model):
             :class:`~tokenizers.trainers.Trainer`: The Trainer used to train this model
         """
         pass
+
     def id_to_token(self, id):
         """
         Get the token associated to an ID
@@ -172,6 +177,7 @@ class BPE(Model):
             :obj:`str`: The token associated to the ID
         """
         pass
+
     @staticmethod
     def read_file(self, vocab, merges):
         """
@@ -193,6 +199,7 @@ class BPE(Model):
                 The vocabulary and merges loaded into memory
         """
         pass
+
     def save(self, folder, prefix):
         """
         Save the current model
@@ -212,6 +219,7 @@ class BPE(Model):
             :obj:`List[str]`: The list of saved files
         """
         pass
+
     def token_to_id(self, tokens):
         """
         Get the ID associated to a token
@@ -224,6 +232,7 @@ class BPE(Model):
             :obj:`int`: The ID associated to the token
         """
         pass
+
     def tokenize(self, sequence):
         """
         Tokenize a sequence
@@ -245,9 +254,9 @@ class Unigram(Model):
         vocab (:obj:`List[Tuple[str, float]]`, `optional`, `optional`):
             A list of vocabulary items and their relative score [("am", -0.2442),...]
     """
-
     def __init__(self, vocab, unk_id, byte_fallback):
         pass
+
     def get_trainer(self):
         """
         Get the associated :class:`~tokenizers.trainers.Trainer`
@@ -259,6 +268,7 @@ class Unigram(Model):
             :class:`~tokenizers.trainers.Trainer`: The Trainer used to train this model
         """
         pass
+
     def id_to_token(self, id):
         """
         Get the token associated to an ID
@@ -271,6 +281,7 @@ class Unigram(Model):
             :obj:`str`: The token associated to the ID
         """
         pass
+
     def save(self, folder, prefix):
         """
         Save the current model
@@ -290,6 +301,7 @@ class Unigram(Model):
             :obj:`List[str]`: The list of saved files
         """
         pass
+
     def token_to_id(self, tokens):
         """
         Get the ID associated to a token
@@ -302,6 +314,7 @@ class Unigram(Model):
             :obj:`int`: The ID associated to the token
         """
         pass
+
     def tokenize(self, sequence):
         """
         Tokenize a sequence
@@ -328,9 +341,9 @@ class WordLevel(Model):
         unk_token (:obj:`str`, `optional`):
             The unknown token to be used by the model.
     """
-
     def __init__(self, vocab, unk_token):
         pass
+
     @staticmethod
     def from_file(vocab, unk_token):
         """
@@ -353,6 +366,7 @@ class WordLevel(Model):
             :class:`~tokenizers.models.WordLevel`: An instance of WordLevel loaded from file
         """
         pass
+
     def get_trainer(self):
         """
         Get the associated :class:`~tokenizers.trainers.Trainer`
@@ -364,6 +378,7 @@ class WordLevel(Model):
             :class:`~tokenizers.trainers.Trainer`: The Trainer used to train this model
         """
         pass
+
     def id_to_token(self, id):
         """
         Get the token associated to an ID
@@ -376,6 +391,7 @@ class WordLevel(Model):
             :obj:`str`: The token associated to the ID
         """
         pass
+
     @staticmethod
     def read_file(vocab):
         """
@@ -393,6 +409,7 @@ class WordLevel(Model):
             :obj:`Dict[str, int]`: The vocabulary as a :obj:`dict`
         """
         pass
+
     def save(self, folder, prefix):
         """
         Save the current model
@@ -412,6 +429,7 @@ class WordLevel(Model):
             :obj:`List[str]`: The list of saved files
         """
         pass
+
     def token_to_id(self, tokens):
         """
         Get the ID associated to a token
@@ -424,6 +442,7 @@ class WordLevel(Model):
             :obj:`int`: The ID associated to the token
         """
         pass
+
     def tokenize(self, sequence):
         """
         Tokenize a sequence
@@ -451,9 +470,9 @@ class WordPiece(Model):
         max_input_chars_per_word (:obj:`int`, `optional`):
             The maximum number of characters to authorize in a single word.
     """
-
     def __init__(self, vocab, unk_token, max_input_chars_per_word):
         pass
+
     @staticmethod
     def from_file(vocab, **kwargs):
         """
@@ -476,6 +495,7 @@ class WordPiece(Model):
             :class:`~tokenizers.models.WordPiece`: An instance of WordPiece loaded from file
         """
         pass
+
     def get_trainer(self):
         """
         Get the associated :class:`~tokenizers.trainers.Trainer`
@@ -487,6 +507,7 @@ class WordPiece(Model):
             :class:`~tokenizers.trainers.Trainer`: The Trainer used to train this model
         """
         pass
+
     def id_to_token(self, id):
         """
         Get the token associated to an ID
@@ -499,6 +520,7 @@ class WordPiece(Model):
             :obj:`str`: The token associated to the ID
         """
         pass
+
     @staticmethod
     def read_file(vocab):
         """
@@ -517,6 +539,7 @@ class WordPiece(Model):
             :obj:`Dict[str, int]`: The vocabulary as a :obj:`dict`
         """
         pass
+
     def save(self, folder, prefix):
         """
         Save the current model
@@ -536,6 +559,7 @@ class WordPiece(Model):
             :obj:`List[str]`: The list of saved files
         """
         pass
+
     def token_to_id(self, tokens):
         """
         Get the ID associated to a token
@@ -548,6 +572,7 @@ class WordPiece(Model):
             :obj:`int`: The ID associated to the token
         """
         pass
+
     def tokenize(self, sequence):
         """
         Tokenize a sequence
diff --git a/bindings/python/py_src/tokenizers/normalizers/__init__.pyi b/bindings/python/py_src/tokenizers/normalizers/__init__.pyi
index 09c2d8397..507d44731 100644
--- a/bindings/python/py_src/tokenizers/normalizers/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/normalizers/__init__.pyi
@@ -6,7 +6,6 @@ class Normalizer:
     This class is not supposed to be instantiated directly. Instead, any implementation of a
     Normalizer will return an instance of this class when instantiated.
     """
-
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -22,6 +21,7 @@ class Normalizer:
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -62,9 +62,9 @@ class BertNormalizer(Normalizer):
         lowercase (:obj:`bool`, `optional`, defaults to :obj:`True`):
             Whether to lowercase.
     """
-
     def __init__(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -80,6 +80,7 @@ class BertNormalizer(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -102,9 +103,9 @@ class Lowercase(Normalizer):
     """
     Lowercase Normalizer
     """
-
     def __init__(self):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -120,6 +121,7 @@ class Lowercase(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -142,9 +144,9 @@ class NFC(Normalizer):
     """
     NFC Unicode Normalizer
     """
-
     def __init__(self):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -160,6 +162,7 @@ class NFC(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -182,9 +185,9 @@ class NFD(Normalizer):
     """
     NFD Unicode Normalizer
     """
-
     def __init__(self):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -200,6 +203,7 @@ class NFD(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -222,9 +226,9 @@ class NFKC(Normalizer):
     """
     NFKC Unicode Normalizer
     """
-
     def __init__(self):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -240,6 +244,7 @@ class NFKC(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -262,9 +267,9 @@ class NFKD(Normalizer):
     """
     NFKD Unicode Normalizer
     """
-
     def __init__(self):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -280,6 +285,7 @@ class NFKD(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -302,9 +308,9 @@ class Nmt(Normalizer):
     """
     Nmt normalizer
     """
-
     def __init__(self):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -320,6 +326,7 @@ class Nmt(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -343,9 +350,9 @@ class Precompiled(Normalizer):
     Precompiled normalizer
     Don't use manually it is used for compatiblity for SentencePiece.
     """
-
     def __init__(self, precompiled_charsmap):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -361,6 +368,7 @@ class Precompiled(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -383,9 +391,9 @@ class Prepend(Normalizer):
     """
     Prepend normalizer
     """
-
     def __init__(self, prepend):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -401,6 +409,7 @@ class Prepend(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -423,9 +432,9 @@ class Replace(Normalizer):
     """
     Replace normalizer
     """
-
     def __init__(self, pattern, content):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -441,6 +450,7 @@ class Replace(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -468,7 +478,6 @@ class Sequence(Normalizer):
         normalizers (:obj:`List[Normalizer]`):
             A list of Normalizer to be run as a sequence
     """
-
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -484,6 +493,7 @@ class Sequence(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -506,9 +516,9 @@ class Strip(Normalizer):
     """
     Strip normalizer
     """
-
     def __init__(self, left=True, right=True):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -524,6 +534,7 @@ class Strip(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
@@ -546,9 +557,9 @@ class StripAccents(Normalizer):
     """
     StripAccents normalizer
     """
-
     def __init__(self):
         pass
+
     def normalize(self, normalized):
         """
         Normalize a :class:`~tokenizers.NormalizedString` in-place
@@ -564,6 +575,7 @@ class StripAccents(Normalizer):
                 :class:`~tokenizers.normalizers.Normalizer`
         """
         pass
+
     def normalize_str(self, sequence):
         """
         Normalize the given string
diff --git a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi
index e3cb84dd2..9e975326f 100644
--- a/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi
@@ -6,7 +6,6 @@ class PreTokenizer:
     This class is not supposed to be instantiated directly. Instead, any implementation of a
     PreTokenizer will return an instance of this class when instantiated.
     """
-
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -23,6 +22,7 @@ class PreTokenizer:
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -50,9 +50,9 @@ class BertPreTokenizer(PreTokenizer):
     This pre-tokenizer splits tokens on spaces, and also on punctuation.
     Each occurence of a punctuation character will be treated separately.
     """
-
     def __init__(self):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -69,6 +69,7 @@ class BertPreTokenizer(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -104,9 +105,9 @@ class ByteLevel(PreTokenizer):
             Set this to :obj:`False` to prevent this `pre_tokenizer` from using
             the GPT2 specific regexp for spliting on whitespace.
     """
-
     def __init__(self, add_prefix_space=True, use_regex=True):
         pass
+
     @staticmethod
     def alphabet():
         """
@@ -120,6 +121,7 @@ class ByteLevel(PreTokenizer):
             :obj:`List[str]`: A list of characters that compose the alphabet
         """
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -136,6 +138,7 @@ class ByteLevel(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -164,7 +167,6 @@ class CharDelimiterSplit(PreTokenizer):
         delimiter: str:
             The delimiter char that will be used to split input
     """
-
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -181,6 +183,7 @@ class CharDelimiterSplit(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -215,9 +218,9 @@ class Digits(PreTokenizer):
 
                 "Call 123 please" -> "Call ", "123", " please"
     """
-
     def __init__(self, individual_digits=False):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -234,6 +237,7 @@ class Digits(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -270,9 +274,9 @@ class Metaspace(PreTokenizer):
             Whether to add a space to the first word if there isn't already one. This
             lets us treat `hello` exactly like `say hello`.
     """
-
     def __init__(self, replacement="_", add_prefix_space=True):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -289,6 +293,7 @@ class Metaspace(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -319,9 +324,9 @@ class Punctuation(PreTokenizer):
             Choices: "removed", "isolated" (default), "merged_with_previous", "merged_with_next",
             "contiguous"
     """
-
     def __init__(self, behavior="isolated"):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -338,6 +343,7 @@ class Punctuation(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -362,9 +368,9 @@ class Sequence(PreTokenizer):
     """
     This pre-tokenizer composes other pre_tokenizers and applies them in sequence
     """
-
     def __init__(self, pretokenizers):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -381,6 +387,7 @@ class Sequence(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -421,9 +428,9 @@ class Split(PreTokenizer):
         invert (:obj:`bool`, `optional`, defaults to :obj:`False`):
             Whether to invert the pattern.
     """
-
     def __init__(self, pattern, behavior, invert=False):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -440,6 +447,7 @@ class Split(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -467,9 +475,9 @@ class UnicodeScripts(PreTokenizer):
     Actually Hiragana and Katakana are fused with Han, and 0x30FC is Han too.
     This mimicks SentencePiece Unigram implementation.
     """
-
     def __init__(self):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -486,6 +494,7 @@ class UnicodeScripts(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -510,9 +519,9 @@ class Whitespace(PreTokenizer):
     """
     This pre-tokenizer simply splits using the following regex: `\w+|[^\w\s]+`
     """
-
     def __init__(self):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -529,6 +538,7 @@ class Whitespace(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
@@ -553,9 +563,9 @@ class WhitespaceSplit(PreTokenizer):
     """
     This pre-tokenizer simply splits on the whitespace. Works like `.split()`
     """
-
     def __init__(self):
         pass
+
     def pre_tokenize(self, pretok):
         """
         Pre-tokenize a :class:`~tokenizers.PyPreTokenizedString` in-place
@@ -572,6 +582,7 @@ class WhitespaceSplit(PreTokenizer):
                 :class:`~tokenizers.pre_tokenizers.PreTokenizer`
         """
         pass
+
     def pre_tokenize_str(self, sequence):
         """
         Pre tokenize the given string
diff --git a/bindings/python/py_src/tokenizers/processors/__init__.pyi b/bindings/python/py_src/tokenizers/processors/__init__.pyi
index ab73a337c..5136d02bb 100644
--- a/bindings/python/py_src/tokenizers/processors/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/processors/__init__.pyi
@@ -6,7 +6,6 @@ class PostProcessor:
     This class is not supposed to be instantiated directly. Instead, any implementation of
     a PostProcessor will return an instance of this class when instantiated.
     """
-
     def num_special_tokens_to_add(self, is_pair):
         """
         Return the number of special tokens that would be added for single/pair sentences.
@@ -19,6 +18,7 @@ class PostProcessor:
             :obj:`int`: The number of tokens to add
         """
         pass
+
     def process(self, encoding, pair=None, add_special_tokens=True):
         """
         Post-process the given encodings, generating the final one
@@ -53,9 +53,9 @@ class BertProcessing(PostProcessor):
         cls (:obj:`Tuple[str, int]`):
             A tuple with the string representation of the CLS token, and its id
     """
-
     def __init__(self, sep, cls):
         pass
+
     def num_special_tokens_to_add(self, is_pair):
         """
         Return the number of special tokens that would be added for single/pair sentences.
@@ -68,6 +68,7 @@ class BertProcessing(PostProcessor):
             :obj:`int`: The number of tokens to add
         """
         pass
+
     def process(self, encoding, pair=None, add_special_tokens=True):
         """
         Post-process the given encodings, generating the final one
@@ -98,9 +99,9 @@ class ByteLevel(PostProcessor):
         trim_offsets (:obj:`bool`):
             Whether to trim the whitespaces from the produced offsets.
     """
-
     def __init__(self, trim_offsets=True):
         pass
+
     def num_special_tokens_to_add(self, is_pair):
         """
         Return the number of special tokens that would be added for single/pair sentences.
@@ -113,6 +114,7 @@ class ByteLevel(PostProcessor):
             :obj:`int`: The number of tokens to add
         """
         pass
+
     def process(self, encoding, pair=None, add_special_tokens=True):
         """
         Post-process the given encodings, generating the final one
@@ -159,9 +161,9 @@ class RobertaProcessing(PostProcessor):
             Whether the add_prefix_space option was enabled during pre-tokenization. This
             is relevant because it defines the way the offsets are trimmed out.
     """
-
     def __init__(self, sep, cls, trim_offsets=True, add_prefix_space=True):
         pass
+
     def num_special_tokens_to_add(self, is_pair):
         """
         Return the number of special tokens that would be added for single/pair sentences.
@@ -174,6 +176,7 @@ class RobertaProcessing(PostProcessor):
             :obj:`int`: The number of tokens to add
         """
         pass
+
     def process(self, encoding, pair=None, add_special_tokens=True):
         """
         Post-process the given encodings, generating the final one
@@ -201,9 +204,9 @@ class Sequence(PostProcessor):
         processors (:obj:`List[PostProcessor]`)
             The processors that need to be chained
     """
-
     def __init__(self, processors):
         pass
+
     def num_special_tokens_to_add(self, is_pair):
         """
         Return the number of special tokens that would be added for single/pair sentences.
@@ -216,6 +219,7 @@ class Sequence(PostProcessor):
             :obj:`int`: The number of tokens to add
         """
         pass
+
     def process(self, encoding, pair=None, add_special_tokens=True):
         """
         Post-process the given encodings, generating the final one
@@ -302,9 +306,9 @@ class TemplateProcessing(PostProcessor):
              The given dict expects the provided :obj:`ids` and :obj:`tokens` lists to have
              the same length.
     """
-
     def __init__(self, single, pair, special_tokens):
         pass
+
     def num_special_tokens_to_add(self, is_pair):
         """
         Return the number of special tokens that would be added for single/pair sentences.
@@ -317,6 +321,7 @@ class TemplateProcessing(PostProcessor):
             :obj:`int`: The number of tokens to add
         """
         pass
+
     def process(self, encoding, pair=None, add_special_tokens=True):
         """
         Post-process the given encodings, generating the final one
diff --git a/bindings/python/py_src/tokenizers/tools/visualizer.py b/bindings/python/py_src/tokenizers/tools/visualizer.py
index da368054c..c988a6481 100644
--- a/bindings/python/py_src/tokenizers/tools/visualizer.py
+++ b/bindings/python/py_src/tokenizers/tools/visualizer.py
@@ -92,7 +92,7 @@ def __init__(
         if default_to_notebook:
             try:
                 from IPython.core.display import HTML, display
-            except ImportError as e:
+            except ImportError:
                 raise Exception(
                     """We couldn't import IPython utils for html display.
                         Are you running in a notebook?
@@ -136,7 +136,7 @@ def __call__(
         if final_default_to_notebook:
             try:
                 from IPython.core.display import HTML, display
-            except ImportError as e:
+            except ImportError:
                 raise Exception(
                     """We couldn't import IPython utils for html display.
                     Are you running in a notebook?"""
@@ -170,7 +170,7 @@ def calculate_label_colors(annotations: AnnotationList) -> Dict[str, str]:
         if h_step < 20:
             h_step = 20
         s = 32
-        l = 64
+        l = 64  # noqa: E741
         h = 10
         colors = {}
 
diff --git a/bindings/python/py_src/tokenizers/trainers/__init__.pyi b/bindings/python/py_src/tokenizers/trainers/__init__.pyi
index 911fdeb29..d6c525718 100644
--- a/bindings/python/py_src/tokenizers/trainers/__init__.pyi
+++ b/bindings/python/py_src/tokenizers/trainers/__init__.pyi
@@ -80,7 +80,6 @@ class UnigramTrainer(Trainer):
             The number of iterations of the EM algorithm to perform before
             pruning the vocabulary.
     """
-
     def __init__(
         self,
         vocab_size=8000,
@@ -143,7 +142,6 @@ class WordPieceTrainer(Trainer):
         end_of_word_suffix (:obj:`str`, `optional`):
             A suffix to be used for every subword that is a end-of-word.
     """
-
     def __init__(
         self,
         vocab_size=30000,
diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml
index 81f70af3d..5cdf090fa 100644
--- a/bindings/python/pyproject.toml
+++ b/bindings/python/pyproject.toml
@@ -34,7 +34,7 @@ Source = 'https://github.com/huggingface/tokenizers'
 
 
 [project.optional-dependencies]
-testing = ["pytest", "requests", "numpy", "datasets", "black==22.3"]
+testing = ["pytest", "requests", "numpy", "datasets", "black==22.3", "ruff"]
 docs = ["sphinx", "sphinx_rtd_theme", "setuptools_rust"]
 dev = ["tokenizers[testing]"]
 
@@ -52,3 +52,21 @@ features = ["pyo3/extension-module"]
 [tool.black]
 line-length = 119
 target-version = ['py35']
+
+[tool.ruff]
+line-length = 119
+target-version = "py311"
+lint.ignore = [
+    # a == None in tests vs is None.
+    "E711",
+    # a == False in tests vs is False.
+    "E712",
+    # try.. import except.. pattern without using the lib.
+    "F401",
+    # Raw type equality is required in asserts
+    "E721",
+    # Import order
+    "E402",
+    # Fixtures unused import
+    "F811",
+]
diff --git a/bindings/python/scripts/convert.py b/bindings/python/scripts/convert.py
index 6c812f8c7..e6df5ad8a 100644
--- a/bindings/python/scripts/convert.py
+++ b/bindings/python/scripts/convert.py
@@ -80,9 +80,7 @@ def tokenizer(self, proto):
             tokenizer = Tokenizer(Unigram(vocab, unk_id))
         elif model_type == 2:
             vocab, merges = SentencePieceExtractor(self.original_tokenizer.vocab_file).extract()
-            tokenizer = Tokenizer(
-                BPE(vocab, merges, unk_token=proto.trainer_spec.unk_piece, fuse_unk=True)
-            )
+            tokenizer = Tokenizer(BPE(vocab, merges, unk_token=proto.trainer_spec.unk_piece, fuse_unk=True))
         else:
             raise Exception(
                 "You're trying to run a `Unigram` model but you're file was trained with a different algorithm"
@@ -105,12 +103,8 @@ def converted(self):
 
         replacement = "▁"
         add_prefix_space = True
-        tokenizer.pre_tokenizer = Metaspace(
-            replacement=replacement, add_prefix_space=add_prefix_space
-        )
-        tokenizer.decoder = decoders.Metaspace(
-            replacement=replacement, add_prefix_space=add_prefix_space
-        )
+        tokenizer.pre_tokenizer = Metaspace(replacement=replacement, add_prefix_space=add_prefix_space)
+        tokenizer.decoder = decoders.Metaspace(replacement=replacement, add_prefix_space=add_prefix_space)
         post_processor = self.post_processor(tokenizer)
         if post_processor:
             tokenizer.post_processor = post_processor
@@ -124,9 +118,7 @@ def converted(self):
 class AlbertConverter(SpmConverter):
     def vocab(self, proto):
         return [
-            (piece.piece, piece.score)
-            if check_number_comma(piece.piece)
-            else (piece.piece, piece.score - 100)
+            (piece.piece, piece.score) if check_number_comma(piece.piece) else (piece.piece, piece.score - 100)
             for piece in proto.pieces
         ]
 
@@ -261,9 +253,7 @@ def post_processor(self, tokenizer):
 class XLNetConverter(SpmConverter):
     def vocab(self, proto):
         return [
-            (piece.piece, piece.score)
-            if check_number_comma(piece.piece)
-            else (piece.piece, piece.score - 100)
+            (piece.piece, piece.score) if check_number_comma(piece.piece) else (piece.piece, piece.score - 100)
             for piece in proto.pieces
         ]
 
@@ -420,9 +410,7 @@ def main():
     print(f"|{'-'*model_len}|{'-'*status_len}|{'-'*speedup_len}|")
     for pretrained in args.models:
         status, speedup = check(pretrained, args.filename)
-        print(
-            f"|{pretrained:<{model_len}}|{status:^{status_len}}|{speedup:^{speedup_len - 1}.2f}x|"
-        )
+        print(f"|{pretrained:<{model_len}}|{status:^{status_len}}|{speedup:^{speedup_len - 1}.2f}x|")
 
 
 if __name__ == "__main__":
diff --git a/bindings/python/scripts/sentencepiece_extractor.py b/bindings/python/scripts/sentencepiece_extractor.py
index fba05d8f4..a7bce9b49 100644
--- a/bindings/python/scripts/sentencepiece_extractor.py
+++ b/bindings/python/scripts/sentencepiece_extractor.py
@@ -59,7 +59,6 @@ def __init__(self, model: str):
 
     def extract(self) -> Tuple[Dict[str, int], List[Tuple]]:
         with open(self._model, "r") as model_f:
-
             # Retrieve information
             nb_pieces, nb_merges = map(int, model_f.readline().split())
             vocab, merges = {}, []
@@ -97,9 +96,7 @@ def extract(self) -> Tuple[Dict[str, int], List[Tuple]]:
         choices=["sentencepiece", "youtokentome"],
         help="Indicate the format of the file.",
     )
-    parser.add_argument(
-        "--model", type=str, required=True, help="SentencePiece model to extract vocab from."
-    )
+    parser.add_argument("--model", type=str, required=True, help="SentencePiece model to extract vocab from.")
     parser.add_argument(
         "--vocab-output-path",
         type=str,
@@ -128,9 +125,7 @@ def extract(self) -> Tuple[Dict[str, int], List[Tuple]]:
                 args.model = f.name
 
         # Allocate extractor
-        extractor = (
-            SentencePieceExtractor if args.provider == "sentencepiece" else YouTokenToMeExtractor
-        )
+        extractor = SentencePieceExtractor if args.provider == "sentencepiece" else YouTokenToMeExtractor
         extractor = extractor(args.model)
 
         logger.info(f"Using {type(extractor).__name__}")
diff --git a/bindings/python/scripts/spm_parity_check.py b/bindings/python/scripts/spm_parity_check.py
index 09e5b9475..33cfff4fa 100644
--- a/bindings/python/scripts/spm_parity_check.py
+++ b/bindings/python/scripts/spm_parity_check.py
@@ -121,9 +121,7 @@ def check_train(args):
             break
 
     print(f"Tokenizer used {tokenizer_tokens}, where spm used {spm_tokens}")
-    assert (
-        tokenizer_tokens < spm_tokens
-    ), "Our trainer should be at least more efficient than the SPM one"
+    assert tokenizer_tokens < spm_tokens, "Our trainer should be at least more efficient than the SPM one"
     print("Ok our trainer is at least more efficient than the SPM one")
 
 
@@ -131,9 +129,7 @@ def check_diff(spm_diff, tok_diff, sp, tok):
     if spm_diff == list(reversed(tok_diff)):
         # AAA -> AA+A vs A+AA case.
         return True
-    elif len(spm_diff) == len(tok_diff) and tok.decode(spm_diff) == tok.decode(
-        tok_diff
-    ):
+    elif len(spm_diff) == len(tok_diff) and tok.decode(spm_diff) == tok.decode(tok_diff):
         # Second order OK
         # Barrich -> Barr + ich vs Bar + rich
         return True
@@ -173,24 +169,17 @@ def check_details(line, spm_ids, tok_ids, sp, tok):
         spms = Counter(spm_ids[first:last])
         toks = Counter(tok_ids[first:last])
 
-        removable_tokens = {
-            spm_ for (spm_, si) in spms.items() if toks.get(spm_, 0) == si
-        }
+        removable_tokens = {spm_ for (spm_, si) in spms.items() if toks.get(spm_, 0) == si}
         min_width = 3
         for i in range(last - first - min_width):
-            if all(
-                spm_ids[first + i + j] in removable_tokens for j in range(min_width)
-            ):
+            if all(spm_ids[first + i + j] in removable_tokens for j in range(min_width)):
                 possible_matches = [
                     k
                     for k in range(last - first - min_width)
-                    if tok_ids[first + k : first + k + min_width]
-                    == spm_ids[first + i : first + i + min_width]
+                    if tok_ids[first + k : first + k + min_width] == spm_ids[first + i : first + i + min_width]
                 ]
                 for j in possible_matches:
-                    if check_diff(
-                        spm_ids[first : first + i], tok_ids[first : first + j], sp, tok
-                    ) and check_details(
+                    if check_diff(spm_ids[first : first + i], tok_ids[first : first + j], sp, tok) and check_details(
                         line,
                         spm_ids[first + i : last],
                         tok_ids[first + j : last],
@@ -210,9 +199,7 @@ def check_details(line, spm_ids, tok_ids, sp, tok):
     wrong = tok.decode(spm_ids[first:last])
     print()
     if has_color:
-        print(
-            f"{colored(ok_start, 'grey')}{colored(wrong, 'red')}{colored(ok_end, 'grey')}"
-        )
+        print(f"{colored(ok_start, 'grey')}{colored(wrong, 'red')}{colored(ok_end, 'grey')}")
     else:
         print(wrong)
     return False
@@ -251,9 +238,7 @@ def check_encode(args):
 
             if args.verbose:
                 if i % 10000 == 0:
-                    print(
-                        f"({perfect} / {imperfect} / {wrong} ----- {perfect + imperfect + wrong})"
-                    )
+                    print(f"({perfect} / {imperfect} / {wrong} ----- {perfect + imperfect + wrong})")
                     print(f"SPM: {spm_total_time} - TOK: {tok_total_time}")
 
             if ids != encoded.ids:
@@ -265,13 +250,13 @@ def check_encode(args):
             else:
                 perfect += 1
 
-            assert ids == encoded.ids, f"line {i}: {line} : \n\n{ids}\n{encoded.ids}\n{list(zip(encoded.ids, encoded.tokens))}"
+            assert (
+                ids == encoded.ids
+            ), f"line {i}: {line} : \n\n{ids}\n{encoded.ids}\n{list(zip(encoded.ids, encoded.tokens))}"
 
     print(f"({perfect} / {imperfect} / {wrong} ----- {perfect + imperfect + wrong})")
     total = perfect + imperfect + wrong
-    print(
-        f"Accuracy {perfect * 100 / total:.2f} Slowdown : {tok_total_time/ spm_total_time:.2f}"
-    )
+    print(f"Accuracy {perfect * 100 / total:.2f} Slowdown : {tok_total_time/ spm_total_time:.2f}")
 
 
 if __name__ == "__main__":
diff --git a/bindings/python/stub.py b/bindings/python/stub.py
index c9a20237c..41ef2d6ec 100644
--- a/bindings/python/stub.py
+++ b/bindings/python/stub.py
@@ -3,8 +3,6 @@
 import os
 from pathlib import Path
 
-import black
-
 
 INDENT = " " * 4
 GENERATED_COMMENT = "# Generated content DO NOT EDIT\n"
@@ -85,7 +83,7 @@ def pyi_file(obj, indent=""):
             body += f"{indent+INDENT}pass\n"
             body += "\n"
 
-        for (name, fn) in fns:
+        for name, fn in fns:
             body += pyi_file(fn, indent=indent)
 
         if not body:
@@ -122,18 +120,17 @@ def py_file(module, origin):
     return string
 
 
-def do_black(content, is_pyi):
-    mode = black.Mode(
-        target_versions={black.TargetVersion.PY35},
-        line_length=119,
-        is_pyi=is_pyi,
-        string_normalization=True,
-        experimental_string_processing=False,
-    )
-    try:
-        return black.format_file_contents(content, fast=True, mode=mode)
-    except black.NothingChanged:
-        return content
+import subprocess
+from typing import List, Optional, Tuple
+
+
+def do_ruff(code, is_pyi: bool):
+    command = ["ruff", "format", "--config", "pyproject.toml", "--silent", "-"]
+    if is_pyi:
+        command.extend(["--stdin-filename", "test.pyi"])
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+    stdout, _ = process.communicate(input=code.encode("utf-8"))
+    return stdout.decode("utf-8")
 
 
 def write(module, directory, origin, check=False):
@@ -141,7 +138,7 @@ def write(module, directory, origin, check=False):
 
     filename = os.path.join(directory, "__init__.pyi")
     pyi_content = pyi_file(module)
-    pyi_content = do_black(pyi_content, is_pyi=True)
+    pyi_content = do_ruff(pyi_content, is_pyi=True)
     os.makedirs(directory, exist_ok=True)
     if check:
         with open(filename, "r") as f:
@@ -153,7 +150,7 @@ def write(module, directory, origin, check=False):
 
     filename = os.path.join(directory, "__init__.py")
     py_content = py_file(module, origin)
-    py_content = do_black(py_content, is_pyi=False)
+    py_content = do_ruff(py_content, is_pyi=False)
     os.makedirs(directory, exist_ok=True)
 
     is_auto = False
diff --git a/bindings/python/tests/bindings/test_models.py b/bindings/python/tests/bindings/test_models.py
index b4b29682d..c6a50ce86 100644
--- a/bindings/python/tests/bindings/test_models.py
+++ b/bindings/python/tests/bindings/test_models.py
@@ -3,7 +3,6 @@
 import pytest
 
 from tokenizers.models import BPE, Model, WordLevel, WordPiece
-
 from ..utils import bert_files, data_dir, roberta_files
 
 
diff --git a/bindings/python/tests/bindings/test_normalizers.py b/bindings/python/tests/bindings/test_normalizers.py
index cf9f3d1a4..3fafd60d1 100644
--- a/bindings/python/tests/bindings/test_normalizers.py
+++ b/bindings/python/tests/bindings/test_normalizers.py
@@ -2,8 +2,7 @@
 
 import pytest
 
-from tokenizers import NormalizedString, Tokenizer
-from tokenizers.models import BPE
+from tokenizers import NormalizedString
 from tokenizers.normalizers import BertNormalizer, Lowercase, Normalizer, Sequence, Strip, Prepend
 
 
diff --git a/bindings/python/tests/bindings/test_processors.py b/bindings/python/tests/bindings/test_processors.py
index 14af9fbe4..842754a69 100644
--- a/bindings/python/tests/bindings/test_processors.py
+++ b/bindings/python/tests/bindings/test_processors.py
@@ -146,18 +146,18 @@ def test_instantiate(self):
         assert isinstance(pickle.loads(pickle.dumps(bert)), TemplateProcessing)
 
         # It is absolutely legal to have tokens with spaces in the name:
-        processor = TemplateProcessing(
+        TemplateProcessing(
             single=["[ C L S ]", "Token with space"],
             special_tokens=[("[ C L S ]", 0), ("Token with space", 1)],
         )
         # Sequence identifiers must be well formed:
         with pytest.raises(Exception, match="Cannot build Piece"):
-            processor = TemplateProcessing(single="[CLS] $$ [SEP]")
+            TemplateProcessing(single="[CLS] $$ [SEP]")
         with pytest.raises(Exception, match="Cannot build Piece"):
-            processor = TemplateProcessing(single="[CLS] $A: [SEP]")
+            TemplateProcessing(single="[CLS] $A: [SEP]")
         # Special tokens must be provided when used in template:
         with pytest.raises(Exception, match="Missing SpecialToken\\(s\\) with id\\(s\\)"):
-            processor = TemplateProcessing(single=["[CLS]"])
+            TemplateProcessing(single=["[CLS]"])
 
     def test_bert_parity(self):
         tokenizer = Tokenizer(BPE())
diff --git a/bindings/python/tests/bindings/test_tokenizer.py b/bindings/python/tests/bindings/test_tokenizer.py
index 2eb5ce59c..01deb7a85 100644
--- a/bindings/python/tests/bindings/test_tokenizer.py
+++ b/bindings/python/tests/bindings/test_tokenizer.py
@@ -5,10 +5,9 @@
 
 from tokenizers import AddedToken, Encoding, Tokenizer
 from tokenizers.implementations import BertWordPieceTokenizer
-from tokenizers.models import BPE, Model, WordPiece, Unigram
-from tokenizers.normalizers import Lowercase
+from tokenizers.models import BPE, Model, Unigram
 from tokenizers.pre_tokenizers import ByteLevel
-from tokenizers.processors import BertProcessing, RobertaProcessing
+from tokenizers.processors import RobertaProcessing
 
 from ..utils import bert_files, data_dir, multiprocessing_with_parallelism, roberta_files
 
diff --git a/bindings/python/tests/documentation/test_pipeline.py b/bindings/python/tests/documentation/test_pipeline.py
index 90117f075..25300ff64 100644
--- a/bindings/python/tests/documentation/test_pipeline.py
+++ b/bindings/python/tests/documentation/test_pipeline.py
@@ -2,7 +2,6 @@
 
 from ..utils import data_dir, doc_pipeline_bert_tokenizer, doc_wiki_tokenizer
 
-
 disable_printing = True
 original_print = print
 
diff --git a/bindings/python/tests/documentation/test_quicktour.py b/bindings/python/tests/documentation/test_quicktour.py
index 866a6f99d..a98b0c12e 100644
--- a/bindings/python/tests/documentation/test_quicktour.py
+++ b/bindings/python/tests/documentation/test_quicktour.py
@@ -1,8 +1,4 @@
 from tokenizers import Tokenizer
-from tokenizers.models import BPE
-from tokenizers.pre_tokenizers import Whitespace
-from tokenizers.trainers import BpeTrainer
-
 from ..utils import data_dir, doc_wiki_tokenizer
 
 
diff --git a/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py b/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py
index 58d93351d..fc9ffce19 100644
--- a/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py
+++ b/bindings/python/tests/documentation/test_tutorial_train_from_iterators.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 import gzip
 import os
 
diff --git a/bindings/python/tests/implementations/test_base_tokenizer.py b/bindings/python/tests/implementations/test_base_tokenizer.py
index 5b4c45160..535964656 100644
--- a/bindings/python/tests/implementations/test_base_tokenizer.py
+++ b/bindings/python/tests/implementations/test_base_tokenizer.py
@@ -1,5 +1,3 @@
-import pytest
-
 from tokenizers import Tokenizer, decoders, models, normalizers, pre_tokenizers, processors
 from tokenizers.implementations import BaseTokenizer
 
diff --git a/bindings/python/tests/implementations/test_bert_wordpiece.py b/bindings/python/tests/implementations/test_bert_wordpiece.py
index a05d98c94..4e7c29cf5 100644
--- a/bindings/python/tests/implementations/test_bert_wordpiece.py
+++ b/bindings/python/tests/implementations/test_bert_wordpiece.py
@@ -1,5 +1,3 @@
-import pytest
-
 from tokenizers import BertWordPieceTokenizer
 
 from ..utils import bert_files, data_dir, multiprocessing_with_parallelism
diff --git a/bindings/python/tests/implementations/test_byte_level_bpe.py b/bindings/python/tests/implementations/test_byte_level_bpe.py
index 579575d3f..441aded7a 100644
--- a/bindings/python/tests/implementations/test_byte_level_bpe.py
+++ b/bindings/python/tests/implementations/test_byte_level_bpe.py
@@ -1,5 +1,3 @@
-import pytest
-
 from tokenizers import ByteLevelBPETokenizer
 
 from ..utils import data_dir, multiprocessing_with_parallelism, roberta_files
diff --git a/bindings/python/tests/implementations/test_char_bpe.py b/bindings/python/tests/implementations/test_char_bpe.py
index 09b2fc6e1..3ce5cf9a3 100644
--- a/bindings/python/tests/implementations/test_char_bpe.py
+++ b/bindings/python/tests/implementations/test_char_bpe.py
@@ -1,5 +1,3 @@
-import pytest
-
 from tokenizers import CharBPETokenizer
 
 from ..utils import data_dir, multiprocessing_with_parallelism, openai_files
diff --git a/bindings/python/tests/implementations/test_sentencepiece.py b/bindings/python/tests/implementations/test_sentencepiece.py
index d9fade774..1da41fec0 100644
--- a/bindings/python/tests/implementations/test_sentencepiece.py
+++ b/bindings/python/tests/implementations/test_sentencepiece.py
@@ -1,5 +1,3 @@
-import os
-
 import pytest
 
 from tokenizers import SentencePieceBPETokenizer, SentencePieceUnigramTokenizer
diff --git a/bindings/python/tests/test_serialization.py b/bindings/python/tests/test_serialization.py
index 2057d763e..a56c6bb33 100644
--- a/bindings/python/tests/test_serialization.py
+++ b/bindings/python/tests/test_serialization.py
@@ -6,7 +6,6 @@
 
 from huggingface_hub import HfApi, cached_download, hf_hub_url
 from tokenizers import Tokenizer
-
 from .utils import albert_base, data_dir
 
 
@@ -15,7 +14,7 @@ def test_full_serialization_albert(self, albert_base):
         # Check we can read this file.
         # This used to fail because of BufReader that would fail because the
         # file exceeds the buffer capacity
-        tokenizer = Tokenizer.from_file(albert_base)
+        Tokenizer.from_file(albert_base)
 
 
 def check(tokenizer_file) -> bool:
@@ -51,8 +50,6 @@ def test_full_deserialization_hub(self):
         # Check we can read this file.
         # This used to fail because of BufReader that would fail because the
         # file exceeds the buffer capacity
-        api = HfApi()
-
         not_loadable = []
         invalid_pre_tokenizer = []
 
@@ -77,7 +74,7 @@ def test_full_deserialization_hub(self):
             except Exception as e:
                 print(f"{model_id} is not loadable: {e}")
                 not_loadable.append(model_id)
-            except:
+            except:  # noqa: E722
                 print(f"{model_id} is not loadable: Rust error")
                 not_loadable.append(model_id)