Apply suggestions from code review

Co-authored-by: Arthur <[email protected]>
huggingface · Aug 8, 2024 · b617b18 · b617b18
1 parent 74e8af1
commit b617b18
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/tokenizers/src/tokenizer/mod.rs b/tokenizers/src/tokenizer/mod.rs
@@ -761,7 +761,7 @@ where
 
     /// Encode the given input. This method accepts both single sequences, as well as pair
     /// sequences. Also, a sequence can be a string, or already pre-tokenized input directly:
-    ///
+    /// Contrarily to `encode`, it does not compute offsets
     /// ```
     /// # use tokenizers::Tokenizer;
     /// # use tokenizers::models::bpe::BPE;

diff --git a/tokenizers/src/tokenizer/pre_tokenizer.rs b/tokenizers/src/tokenizer/pre_tokenizer.rs
@@ -154,7 +154,7 @@ impl PreTokenizedString {
                         .flat_map(|split| {
                             split.tokens.unwrap().into_iter().map(|token| {
                                 // Replace this with the actual fields you need for the Encoding type
-                                (token.id, String::new(), (0, 0), None, 0)
+                                (token.id,String::with_capacity(0), (0, 0), None, 0)
                             })
                         })
                         .collect();