diff --git a/modules/config.py b/modules/config.py index d7e0efe..c3aa4e6 100644 --- a/modules/config.py +++ b/modules/config.py @@ -45,7 +45,6 @@ parser.add_argument("--xformers", action="store_true") parser.add_argument("--tensorrt", action="store_true") -parser.add_argument("--deepfloyd_if", action="store_true") cmd_opts, _ = parser.parse_known_args( os.environ["COMMANDLINE_ARGS"].split(" ") diff --git a/modules/diffusion/pipelines/deepfloyd_if.py b/modules/diffusion/pipelines/deepfloyd_if.py deleted file mode 100644 index 46f99dc..0000000 --- a/modules/diffusion/pipelines/deepfloyd_if.py +++ /dev/null @@ -1,255 +0,0 @@ -import gc -from typing import * - -import psutil -import torch -from diffusers import DiffusionPipeline -from diffusers.utils import pt_to_pil -from transformers import T5EncoderModel - -from modules import config -from modules.shared import get_device, hf_diffusers_cache_dir, hf_transformers_cache_dir - - -class IFDiffusionPipeline: - @classmethod - def from_pretrained( - cls, model_id_I: str, model_id_II: str, model_id_III: str, mode: str = "auto" - ): - device = get_device() - if device.type == "cuda": - vram = torch.cuda.get_device_properties(device).total_memory / (1024**3) - else: - vram = 0 - ram = psutil.virtual_memory().total / (1024**3) - if mode == "auto": - if vram <= 6: - if ram > 32: - mode = "sequential_off_load" - elif vram <= 16: - if ram > 16: - mode = "off_load" - else: - mode = "lowvram" - elif vram <= 24: - mode = "medvram" - if ram > 16: - mode = "off_load" - else: - mode = "normal" - - return cls( - mode=mode, - IF_I_id=model_id_I, - IF_II_id=model_id_II, - IF_III_id=model_id_III, - torch_dtype=torch.float16 if config.get("fp16") else torch.float32, - ) - - def __init__( - self, - mode: Literal[ - "lowvram", "sequential_off_load", "off_load", "medvram", "normal" - ] = "normal", - IF_I_id: str = None, - IF_II_id: str = None, - IF_III_id: str = None, - torch_dtype: torch.dtype = torch.float32, - ): - self.mode = mode - self.IF_I_id = IF_I_id - self.IF_II_id = IF_II_id - self.IF_III_id = IF_III_id - - self.torch_dtype = torch_dtype - self.variant = "fp16" if torch_dtype == torch.float16 else None - - device_str = config.get("device") - if len(device_str.split(",")) == 4: - self.device = [torch.device(d) for d in device_str.split(",")] - else: - self.device = [torch.device(device_str)] * 4 - - self.t5 = None - self.IF_I = None - self.IF_II = None - self.IF_III = None - self.previous = { - "prompt_embeds": None, - "negative_prompt_embeds": None, - "images_I": None, - "images_II": None, - } - - def _flush(self): - gc.collect() - torch.cuda.empty_cache() - - def load_pipeline( - self, - stage: Literal["I", "II", "III"], - pipe_type: Literal["t5", "IF_I", "IF_II", "IF_III"], - **kwargs - ): - if stage == "I": - if self.mode == "lowvram" or self.mode == "medvram": - self.IF_II = None - self.IF_III = None - elif stage == "II": - if self.mode == "lowvram" or self.mode == "medvram": - self.t5 = None - self.IF_I = None - self.IF_III = None - elif stage == "III": - if self.mode == "lowvram" or self.mode == "medvram": - self.t5 = None - self.IF_I = None - self.IF_II = None - - self._flush() - - if pipe_type == "t5": - if self.t5 is None: - self.t5 = T5EncoderModel.from_pretrained( - "DeepFloyd/IF-I-XL-v1.0", - subfolder="text_encoder", - device_map="auto", - torch_dtype=self.torch_dtype, - variant=self.variant, - cache_dir=hf_transformers_cache_dir(), - **kwargs, - ).to(self.device[0]) - elif pipe_type == "IF_I": - if self.IF_I is None: - self.IF_I = DiffusionPipeline.from_pretrained( - self.IF_I_id, - device_map="auto", - torch_dtype=self.torch_dtype, - variant=self.variant, - cache_dir=hf_diffusers_cache_dir(), - **kwargs, - ).to(self.device[1]) - if self.mode == "off_load": - self.IF_I.enable_model_cpu_offload() - elif self.mode == "sequential_off_load": - self.IF_I.enable_sequential_cpu_offload() - elif pipe_type == "IF_II": - if self.IF_II is None: - self.IF_II = DiffusionPipeline.from_pretrained( - self.IF_II_id, - device_map="auto", - torch_dtype=self.torch_dtype, - variant=self.variant, - cache_dir=hf_diffusers_cache_dir(), - **kwargs, - ).to(self.device[2]) - if self.mode == "off_load": - self.IF_II.enable_model_cpu_offload() - elif self.mode == "sequential_off_load": - self.IF_II.enable_sequential_cpu_offload() - elif pipe_type == "IF_III": - if self.IF_III is None: - self.IF_III = DiffusionPipeline.from_pretrained( - self.IF_III_id, - device_map="auto", - torch_dtype=self.torch_dtype, - variant=self.variant, - cache_dir=hf_diffusers_cache_dir(), - **kwargs, - ).to(self.device[3]) - if self.mode == "off_load": - self.IF_III.enable_model_cpu_offload() - elif self.mode == "sequential_off_load": - self.IF_III.enable_sequential_cpu_offload() - - def _encode_prompt(self, prompt: str, negative_prompt: str): - self.load_pipeline("I", "t5") - if self.mode == "lowvram": - self.load_pipeline("I", "IF_I", text_encoder=self.t5, unet=None) - else: - self.load_pipeline("I", "IF_I", text_encoder=self.t5) - prompt_embeds, negative_embeds = self.IF_I.encode_prompt( - prompt=prompt, negative_prompt=negative_prompt - ) - if self.mode == "lowvram": - self.t5 = None - self._flush() - - return prompt_embeds, negative_embeds - - def stage_I( - self, - prompt: str, - negative_prompt: str, - guidance_scale: float, - num_inference_steps: int, - ): - self.previous = { - "prompt_embeds": None, - "negative_prompt_embeds": None, - "images_I": None, - "images_II": None, - } - prompt_embeds, negative_prompt_embeds = self._encode_prompt( - prompt, negative_prompt - ) - - self.load_pipeline("I", "IF_I", text_encoder=None) - images = self.IF_I( - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - guidance_scale=guidance_scale, - num_inference_steps=num_inference_steps, - output_type="pt", - ).images - - self.previous["prompt_embeds"] = prompt_embeds - self.previous["negative_prompt_embeds"] = negative_prompt_embeds - self.previous["images_I"] = images - - yield [(pt_to_pil(images), {})] - - def stage_II( - self, - prompt: str, - negative_prompt: str, - guidance_scale: float, - num_inference_steps: int, - ): - self.load_pipeline("II", "IF_II", text_encoder=None) - - images = self.IF_II( - image=self.previous["images_I"], - prompt_embeds=self.previous["prompt_embeds"], - negative_prompt_embeds=self.previous["negative_prompt_embeds"], - guidance_scale=guidance_scale, - num_inference_steps=num_inference_steps, - output_type="pt", - ).images - - self.previous["images_II"] = images - - yield [(pt_to_pil(self.previous["images_I"]), {}), (pt_to_pil(images), {})] - - def stage_III( - self, - prompt: str, - negative_prompt: str, - guidance_scale: float, - num_inference_steps: int, - ): - self.load_pipeline("II", "IF_III") - - images = self.IF_III( - image=self.previous["images_II"], - prompt=prompt, - negative_prompt=negative_prompt, - guidance_scale=guidance_scale, - num_inference_steps=num_inference_steps, - ).images - - yield [ - (pt_to_pil(self.previous["images_I"]), {}), - (pt_to_pil(self.previous["images_II"]), {}), - (images, {}), - ] diff --git a/modules/model_manager.py b/modules/model_manager.py index 6551c35..50d4c2f 100644 --- a/modules/model_manager.py +++ b/modules/model_manager.py @@ -12,9 +12,7 @@ sd_models: List[DiffusersModel] = [] sd_model: Optional[DiffusersModel] = None -mode: Literal["stable-diffusion", "deepfloyd_if"] = ( - "deepfloyd_if" if config.get("deepfloyd_if") else "stable-diffusion" -) +mode: Literal["stable-diffusion"] = "stable-diffusion" def get_model(model_id: str): diff --git a/modules/tabs/deepfloyd_if.py b/modules/tabs/deepfloyd_if.py deleted file mode 100644 index 75d9b85..0000000 --- a/modules/tabs/deepfloyd_if.py +++ /dev/null @@ -1,181 +0,0 @@ -import gc -from typing import * - -import gradio as gr -import torch - -from modules import model_manager -from modules.components import gallery, image_generation_options -from modules.diffusion.pipelines.deepfloyd_if import IFDiffusionPipeline -from modules.ui import Tab - - -class DeepFloydIF(Tab): - def __init__(self, filepath: str): - super().__init__(filepath) - - self.pipe = IFDiffusionPipeline.from_pretrained( - "DeepFloyd/IF-I-L-v1.0", - "DeepFloyd/IF-II-L-v1.0", - "stabilityai/stable-diffusion-x4-upscaler", - ) - - def title(self): - return "Deepfloyd IF" - - def sort(self): - return 1 - - def visible(self): - return model_manager.mode == "deepfloyd_if" - - def swap_model( - self, IF_I_id: str, IF_II_id: str, IF_III_id: str, mode: str = "auto" - ): - self.pipe = None - gc.collect() - torch.cuda.empty_cache() - self.pipe = IFDiffusionPipeline.from_pretrained( - IF_I_id, IF_II_id, IF_III_id, mode - ) - - return ( - self.pipe.IF_I_id, - self.pipe.IF_II_id, - self.pipe.IF_III_id, - self.pipe.mode, - ) - - def create_generate_fn(self, stage: Literal["I", "II", "III"]): - def generate_image( - prompt, negative_prompt, guidance_scale, num_inference_steps - ): - yield [], "Generating...", gr.Button.update( - value="Generating...", variant="secondary", interactive=False - ) - - count = 0 - - fn = getattr(self.pipe, f"stage_{stage}") - - for data in fn( - prompt=prompt, - negative_prompt=negative_prompt, - guidance_scale=guidance_scale, - num_inference_steps=num_inference_steps, - ): - if type(data) == tuple: - step, preview = data - progress = step / (1 * num_inference_steps) - previews = [] - for images, opts in preview: - previews.extend(images) - - if len(previews) == count: - update = gr.Gallery.update() - else: - update = gr.Gallery.update(value=previews) - count = len(previews) - yield update, f"Progress: {progress * 100:.2f}%, Step: {step}", gr.Button.update( - value="Generating...", variant="secondary", interactive=False - ) - else: - image = data - - results = [] - for images, _ in image: - results.extend(images) - - yield results, "Finished", gr.Button.update( - value=f"Stage {stage}", variant="primary", interactive=True - ) - - return generate_image - - def ui(self, outlet): - with gr.Row(): - IF_I_model_id = gr.Textbox(label="model stage I", value=self.pipe.IF_I_id) - IF_II_model_id = gr.Textbox( - label="model stage II", value=self.pipe.IF_II_id - ) - IF_III_model_if = gr.Textbox( - label="model stage III", value=self.pipe.IF_III_id - ) - mode = gr.Radio( - choices=[ - "auto", - "lowvram", - "medvram", - "off_load", - "sequential_off_load", - "normal", - ], - label="mode", - value="auto", - ) - apply_model_button = gr.Button("💾", elem_classes=["tool-button"]) - apply_model_button.click( - fn=self.swap_model, - inputs=[ - IF_I_model_id, - IF_II_model_id, - IF_III_model_if, - mode, - ], - outputs=[IF_I_model_id, IF_II_model_id, IF_III_model_if, mode], - ) - - with gr.Column(): - with gr.Row(): - with gr.Column(scale=3): - prompts = image_generation_options.prompt_ui() - with gr.Row(): - stage_1_button = gr.Button("Stage I", variant="primary") - stage_2_button = gr.Button("Stage II", variant="primary") - stage_3_button = gr.Button("Stage III", variant="primary") - - with gr.Row(): - with gr.Column(scale=1.25): - with gr.Row(): - guidance_scale = gr.Slider( - minimum=0, - maximum=20, - step=0.5, - value=7.5, - label="CFG Scale", - ) - num_inference_steps = gr.Slider( - minimum=1, maximum=100, step=1, value=50, label="Steps" - ) - - outputs = gallery.outputs_gallery_ui() - - stage_1_button.click( - fn=self.create_generate_fn("I"), - inputs=[ - *prompts, - guidance_scale, - num_inference_steps, - ], - outputs=[*outputs, stage_1_button], - ) - - stage_2_button.click( - fn=self.create_generate_fn("II"), - inputs=[ - *prompts, - guidance_scale, - num_inference_steps, - ], - outputs=[*outputs, stage_2_button], - ) - - stage_3_button.click( - fn=self.create_generate_fn("III"), - inputs=[ - *prompts, - guidance_scale, - num_inference_steps, - ], - outputs=[*outputs, stage_3_button], - ) diff --git a/readme.md b/readme.md index 87cb1f6..5e18efc 100644 --- a/readme.md +++ b/readme.md @@ -10,10 +10,6 @@ ![](./docs/images/readme-screenshots-01.png) -- Deepfloyd IF - -![](./docs/images/readme-screenshots-02.png) - - TensorRT ![](./docs/images/readme-screenshots-03.png) @@ -24,15 +20,11 @@ # Features -- [x] Stable diffusion -- [x] DeepFloyd IF -> with `--deepfloyd_if` +- [x] Stable Siffusion +- [x] Stable Diffusion XL - [x] Acceleration using TensorRT -> with `--tensorrt` -- [ ] ~Acceleration using AITemplate~ -> Use [`VoltaML fast stable diffusion webui`](https://github.com/VoltaML/voltaML-fast-stable-diffusion) - [x] ControlNet -> [plugin](https://github.com/ddPn08/radiata-controlnet-plugin) - [x] Lora & Lycoris -- [x] Hypernetwork -- [ ] Composable lora -- [ ] Latent couple # Installation