We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
The error exists when I try to use the qwen2-vl with qwen2-vl liger kernel to generate text.
The following code got the following error. But the same code if I change the liger kernel to qwen2 instead of qwen2-vl. It will correct.
Then I add the cache_position to the lce_forward function and it got the following error.
Here is the way I used qwen2 liger kernel.
from liger_kernel.transformers import apply_liger_kernel_to_qwen2 apply_liger_kernel_to_qwen2()
here is the way I used qwen2-vl liger kernel.
from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl apply_liger_kernel_to_qwen2_vl()
# Reference: https://internvl.readthedocs.io/en/latest/internvl2.0/quick_start.html import json import time from datetime import datetime from pathlib import Path from typing import Dict, List import torch from PIL import Image, ImageDraw, ImageFont from qwen_vl_utils import process_vision_info from transformers import AutoProcessor, AutoTokenizer, Qwen2VLForConditionalGeneration from mmdoc.data.xdataset_base import PageRecord from mmdoc.data.xdataset_entity import normalize_bbox from mmdoc.data.xdataset_gpt_entity import GPTXDatasetBuilder from mmdoc.paths import DATA_DIR from mmdoc.trainers.eval.gpt_evaluation import get_content_dict, get_value_with_grounding, load_prediction from peft import PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training from peft import LoraConfig, PeftModel from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl from liger_kernel.transformers import apply_liger_kernel_to_qwen2 from liger_kernel.transformers import monkey_patch import transformers # apply_liger_kernel_to_qwen2() print("Applying Liger Kernel to Qwen2-VL model") # monkey_patch.apply_liger_kernel_to_qwen2_vl( # # These args can be used to override the default Liger settings # # cross_entropy=True, # # fused_linear_cross_entropy=False, # ) apply_liger_kernel_to_qwen2_vl( rope= True, cross_entropy = False, fused_linear_cross_entropy = True, rms_norm = True, layer_norm = True, swiglu = True, ) def get_data( dataset_name, doc_id, page_id, header_only: bool = False, line_only: bool = False, filter_labels: List[str] = None, end_to_end: bool = False, num_pages: int = None, text_grounding: bool = False, # Add text grounding require_grounding: bool = False, # Ignore typed texts without bounding box ): dataset_dir = f"{DATA_DIR}/{dataset_name}" iocr_dir = f"{dataset_dir}/iocr_json" field_data_dir = f"{dataset_dir}/field_data" page = PageRecord.load_from_disk(doc_id, page_id, iocr_dir, field_data_dir, load_json=True) add_page_promt = False if num_pages is None else True builder = GPTXDatasetBuilder( name=dataset_name, data_dir=f"{DATA_DIR}", header_only=header_only, line_only=line_only, filter_labels=filter_labels, add_page_promt=add_page_promt, text_grounding=text_grounding, end_to_end=end_to_end, require_grounding=require_grounding, ) doc_id_pg_str = str(doc_id) + "_" + str(page_id) data_json_dir = f"{dataset_dir}/dataset.json" with Path(data_json_dir).open(encoding="utf-8") as f: dataset_info = json.load(f) field_configs = builder._get_field_configs(dataset_info) annos = builder._get_annotations( page=page, field_configs=field_configs, page_idx=page_id, num_pages=num_pages, ) prompt = annos["prompt"] json_string = annos["response_json"] image_path = f"{dataset_dir}/images/{doc_id_pg_str}.jpg" return prompt, json_string, image_path def draw_box( pil_img, draw: ImageDraw, bbox, text: str, color: str, font: ImageFont, text_above: bool = True, y_shift: int = 10 ): x0, y0, x1, y1 = normalize_bbox(bbox=bbox, from_size=(1000, 1000), to_size=pil_img.size) draw.rectangle(((x0, y0), (x1, y1)), outline=color, width=2) if text_above: # Draw text above draw.text((x0 + 10, y0 - y_shift), text=text, font=font, fill=color) else: # Draw text below draw.text((x0 + 10, y0 + y_shift), text=text, font=font, fill=color) def _draw_lines( lines: List[Dict], color: str, pil_img, draw: ImageDraw, font: ImageFont, text_above: bool = True, y_shift: int = 10, delta: int = 0, ): for _row in lines: cnt = 0 for field_name, _value in _row.items(): _txt_value, _coord = get_value_with_grounding(_value) if _coord: txt_value = field_name + ": " + _txt_value y_shift_final = y_shift + cnt * delta # Shift different field differently draw_box(pil_img, draw, _coord, txt_value, color, font, text_above, y_shift_final) cnt += 1 peft_config = LoraConfig( lora_alpha=16, lora_dropout=0.05, r=8, bias="none", target_modules=["q_proj", "v_proj"], task_type="CAUSAL_LM", ) # If you have an 80G A100 GPU, you can put the entire model on a single GPU. # Otherwise, you need to load a model using multiple GPUs, please refer to the `Multiple GPUs` section. path = "Qwen/Qwen2-VL-2B-Instruct" # device_map = split_model('InternVL2-1B') model_path = ( # Header only "/home/paperspace/mmdoc/outputs/internvl/Mainclass40GlobalShop_SPpromptTGPr/checkpoint-14800" ) header_only = True end_to_end = False filter_labels = None # "".split(",") num_pages = 1 text_grounding = True doc_id = 366023271 page_id = 1 max_seq_length = 1024 require_grounding = True prompt, json_string, image_path = get_data( "GlobalShopWF976934_USER_50USER_20241002", doc_id, page_id, header_only=header_only, filter_labels=filter_labels, end_to_end=end_to_end, num_pages=num_pages, text_grounding=text_grounding, require_grounding=require_grounding, ) # model_cls = InternVLChatModel # model = model_cls.from_pretrained( # # model_path, # path, # torch_dtype=torch.bfloat16, # low_cpu_mem_usage=True, # trust_remote_code=True, # ).eval() model = Qwen2VLForConditionalGeneration.from_pretrained( path, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", device_map="auto", ) model = get_peft_model(model, peft_config) model.to("cuda") processor = transformers.AutoProcessor.from_pretrained(path) processor.tokenizer.pad_token = processor.tokenizer.eos_token # processor = AutoProcessor.from_pretrained(path) # tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False) start_time = time.time() messages = [ { "role": "user", "content": [ { "type": "image", "image": image_path, }, {"type": "text", "text": prompt}, ], }, {"role": "assistant", "content": [{"type": "text", "text": json_string}]}, ] text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ) inputs = inputs.to("cuda") # Inference: Generation of the output generated_ids = model.generate(**inputs.data, max_new_tokens=128) generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] output_text = processor.batch_decode( generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False ) print(output_text)
transformers=4.47.1 liger_kernel=0.5.2
The text was updated successfully, but these errors were encountered:
No branches or pull requests
🐛 Describe the bug
The error exists when I try to use the qwen2-vl with qwen2-vl liger kernel to generate text.
The following code got the following error. But the same code if I change the liger kernel to qwen2 instead of qwen2-vl. It will correct.
Then I add the cache_position to the lce_forward function and it got the following error.
Here is the way I used qwen2 liger kernel.
here is the way I used qwen2-vl liger kernel.
Reproduce
Versions
transformers=4.47.1
liger_kernel=0.5.2
The text was updated successfully, but these errors were encountered: