[remove black] And use ruff (#1436)

* nits * Fixing deps. * Ruff update. * Import order matters. * Fix. * Revert ruff fix. * Visualizer. * Putting back the imports. --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
huggingface · Mar 12, 2024 · 29fef1e · 29fef1e
1 parent 72a1973
commit 29fef1e
Showing 29 changed files with 258 additions and 169 deletions.
diff --git a/bindings/python/Makefile b/bindings/python/Makefile
@@ -8,12 +8,14 @@ check_dirs := examples py_src/tokenizers tests
 # Format source code automatically
 style:
 	python stub.py
-	black --line-length 119 --target-version py35 $(check_dirs)
+	ruff check  $(check_dirs) --fix 
+	ruff format $(check_dirs)t 
 
 # Check the source code is formatted correctly
 check-style:
 	python stub.py --check
-	black --check --line-length 119 --target-version py35 examples py_src/tokenizers tests
+	ruff check examples py_src/tokenizers tests 
+	ruff format --check examples py_src/tokenizers tests 
 
 TESTS_RESOURCES = $(DATA_DIR)/small.txt $(DATA_DIR)/roberta.json
 

diff --git a/bindings/python/examples/example.py b/bindings/python/examples/example.py
@@ -4,16 +4,15 @@
 
 from tqdm import tqdm
 
-
-logging.getLogger("transformers").disabled = True
-logging.getLogger("transformers.tokenization_utils").disabled = True
-
 from tokenizers import Tokenizer, decoders, pre_tokenizers
 from tokenizers.models import BPE, WordPiece
 from tokenizers.normalizers import BertNormalizer
 from tokenizers.processors import BertProcessing
 from transformers import BertTokenizer, GPT2Tokenizer
 
+logging.getLogger("transformers").disabled = True
+logging.getLogger("transformers.tokenization_utils").disabled = True
+
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--type", default="gpt2", type=str, help="The type of tokenizer (bert|gpt2)")
@@ -51,9 +50,7 @@
 If the implementation is hard to explain, it's a bad idea.
 If the implementation is easy to explain, it may be a good idea.
 Namespaces are one honking great idea -- let's do more of those!
-""".split(
-        "\n"
-    )
+""".split("\n")
 
 if args.type == "gpt2":
     print("Running GPT-2 tokenizer")

diff --git a/bindings/python/examples/train_with_datasets.py b/bindings/python/examples/train_with_datasets.py
@@ -1,6 +1,6 @@
 import datasets
 
-from tokenizers import Tokenizer, models, normalizers, pre_tokenizers, trainers
+from tokenizers import Tokenizer, models, normalizers, pre_tokenizers
 
 
 # Build a tokenizer