diff --git a/Disco_Diffusion.ipynb b/HighDruidMotas_Custom_Disco_Diffusion_v5_4__!_?.ipynb similarity index 87% rename from Disco_Diffusion.ipynb rename to HighDruidMotas_Custom_Disco_Diffusion_v5_4__!_?.ipynb index 34e89d89..560e253a 100644 --- a/Disco_Diffusion.ipynb +++ b/HighDruidMotas_Custom_Disco_Diffusion_v5_4__!_?.ipynb @@ -1,22 +1,12 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, { "cell_type": "markdown", "metadata": { "id": "TitleTop" }, "source": [ - "# Disco Diffusion v5.4 - Now with Warp\n", + "# Disco Diffusion v5.4.?! - Now with Warp and Customizable Symmetry Functions\n", "\n", "Disco Diffusion - http://discodiffusion.com/ , https://github.com/alembics/disco-diffusion\n", "\n", @@ -31,7 +21,7 @@ "id": "CreditsChTop" }, "source": [ - "### Credits & Changelog \u2b07\ufe0f" + "### Credits & Changelog ⬇️" ] }, { @@ -68,7 +58,9 @@ "\n", "VR Mode by Tom Mason (https://twitter.com/nin_artificial)\n", "\n", - "Horizontal and Vertical symmetry functionality by nshepperd. Symmetry transformation_steps by huemin (https://twitter.com/huemin_art). Symmetry integration into Disco Diffusion by Dmitrii Tochilkin (https://twitter.com/cut_pow).\n", + "Horizontal and Vertical Transform Symmetry by nshepperd. Symmetry transformation_steps by huemin (https://twitter.com/huemin_art). Symmetry integration into Disco Diffusion by Dmitrii Tochilkin (https://twitter.com/cut_pow).\n", + "\n", + "Symmetry Loss Function(s) by PsiloCyborg (https://twitter.com/psilocyborg_nft), Tomas Gravenmier (https://twitter.com/HighDruidMotas), and Carson Bentley (https://twitter.com/Aztecman_Dnb)\n", "\n", "Warp and custom model support by Alex Spirin (https://twitter.com/devdef)." ] @@ -302,7 +294,7 @@ "\n", " Addition of ViT-L/14@336px model (requires high VRAM)\n", "\n", - " v5.4 Update: Jun 14th 2022 - devdef / Alex Spirin, Alex's Warp changes integrated into DD main by gandamu / Adam Letts\n", + " v5.3 Update: Jun 14th 2022 - devdef / Alex Spirin, Alex's Warp changes integrated into DD main by gandamu / Adam Letts\n", "\n", " Warp mode - for smooth/continuous video input results leveraging optical flow estimation and frame blending\n", "\n", @@ -422,8 +414,8 @@ { "cell_type": "code", "metadata": { - "cellView": "form", - "id": "PrepFolders" + "id": "PrepFolders", + "cellView": "form" }, "source": [ "#@title 1.2 Prepare Folders\n", @@ -455,7 +447,7 @@ " #@markdown If you connect your Google Drive, you can save the final image of each run on your drive.\n", " google_drive = True #@param {type:\"boolean\"}\n", " #@markdown Click here if you'd like to save the diffusion model checkpoint file to (and/or load from) your Google Drive:\n", - " save_models_to_google_drive = True #@param {type:\"boolean\"}\n", + " save_models_to_google_drive = False #@param {type:\"boolean\"}\n", "except:\n", " is_colab = False\n", " google_drive = False\n", @@ -1069,6 +1061,62 @@ "def range_loss(input):\n", " return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])\n", "\n", + "def h_symm_loss(im, lpm, x, twist):\n", + " \"\"\"\n", + " im : input image\n", + " lpm : image comparison metric (such as lpips metric)\n", + " x: horizontal offset\n", + " \"\"\"\n", + " width = im.shape[3]\n", + " half_width = int(width / 2)\n", + " \n", + " if abs(x) >= half_width:\n", + " raise ValueError(\"|Offset x| must be less than half the image width\")\n", + " \n", + " slice_width = half_width - abs(x)\n", + " left_offset = max(2*x, 0)\n", + " h1, h2 = im[:, :, :, left_offset:left_offset+slice_width], im[:, :, :, left_offset+slice_width:left_offset+slice_width*2]\n", + "\n", + " if twist:\n", + " h2 = TF.vflip(h2)\n", + "\n", + " h2 = TF.hflip(h2)\n", + " return lpm(h1, h2)\n", + "\n", + "def v_symm_loss(im, lpm, y, twist):\n", + " \"\"\"\n", + " im : input image\n", + " lpm : image comparison metric (such as lpips metric)\n", + " y: vertical offset\n", + " \"\"\"\n", + " height = im.shape[2]\n", + " half_height = int(height / 2)\n", + "\n", + " if abs(y) >= half_height:\n", + " raise ValueError(\"|Offset y| must be less than half the image height\")\n", + " \n", + " slice_height = half_height - abs(y)\n", + " top_offset = max(2*y, 0)\n", + " h1, h2 = im[:, :, top_offset:top_offset+slice_height, :], im[:, :, top_offset+slice_height:top_offset+slice_height*2, :]\n", + "\n", + " if twist:\n", + " h2 = TF.hflip(h2)\n", + " \n", + " h2 = TF.vflip(h2)\n", + " return lpm(h1, h2)\n", + "\n", + "def pinwheel_symm_loss(im, lpm):\n", + " \"\"\"\n", + " im : input image\n", + " lpm : image comparison metric (such as lpips metric)\n", + " \"\"\"\n", + " height, width = im.shape[2], im.shape[3]\n", + " if height != width:\n", + " raise ValueError(\"pinwheel only works on square images (for now)\")\n", + "\n", + " im_rot = torch.rot90(im, 1, [2, 3])\n", + " return lpm(im, im_rot)\n", + "\n", "stop_on_next_loop = False # Make sure GPU memory doesn't get corrupted from cancelling the run mid-way through, allow a full frame to complete\n", "TRANSLATION_SCALE = 1.0/200.0\n", "\n", @@ -1375,7 +1423,7 @@ "\n", " cuts = MakeCutoutsDango(input_resolution,\n", " Overview= args.cut_overview[1000-t_int], \n", - " InnerCrop = args.cut_innercut[1000-t_int], IC_Size_Pow=args.cut_ic_pow, IC_Grey_P = args.cut_icgray_p[1000-t_int]\n", + " InnerCrop = args.cut_innercut[1000-t_int], IC_Size_Pow=args.cut_ic_pow[1000 - t_int], IC_Grey_P = args.cut_icgray_p[1000-t_int]\n", " )\n", " clip_in = normalize(cuts(x_in.add(1).div(2)))\n", " image_embeds = model_stat[\"clip_model\"].encode_image(clip_in).float()\n", @@ -1394,6 +1442,23 @@ " if init is not None and init_scale:\n", " init_losses = lpips_model(x_in, init)\n", " loss = loss + init_losses.sum() * init_scale\n", + "\n", + " if args.h_symmetry_loss and np.array(t.cpu())[0] > 10 * h_symmetry_switch:\n", + " sloss = h_symm_loss(x_in, lpips_model, \n", + " args.h_symmetry_offset, \n", + " args.h_symmetry_twist)\n", + " loss = loss + sloss.sum() * args.h_symmetry_loss_scale\n", + " \n", + " if args.v_symmetry_loss and np.array(t.cpu())[0] > 10 * v_symmetry_switch:\n", + " sloss = v_symm_loss(x_in, lpips_model, \n", + " args.v_symmetry_offset, \n", + " args.v_symmetry_twist)\n", + " loss = loss + sloss.sum() * args.v_symmetry_loss_scale\n", + " \n", + " if args.pinwheel_symmetry_loss and np.array(t.cpu())[0] > 10 * pinwheel_symmetry_switch:\n", + " sloss = pinwheel_symm_loss(x_in, lpips_model)\n", + " loss = loss + sloss.sum() * args.pinwheel_symmetry_loss_scale\n", + "\n", " x_in_grad += torch.autograd.grad(loss, x_in)[0]\n", " if torch.isnan(x_in_grad).any()==False:\n", " grad = -torch.autograd.grad(x_in, x, x_in_grad)[0]\n", @@ -1530,6 +1595,8 @@ " \n", " plt.plot(np.array(loss_values), 'r')\n", "\n", + "\n", + "\n", "def generate_eye_views(trans_scale,batchFolder,filename,frame_num,midas_model, midas_transform):\n", " for i in range(2):\n", " theta = vr_eye_angle * (math.pi/180)\n", @@ -1553,6 +1620,19 @@ " 'tv_scale': tv_scale,\n", " 'range_scale': range_scale,\n", " 'sat_scale': sat_scale,\n", + " 'h_symmetry_loss': h_symmetry_loss,\n", + " 'h_symmetry_loss_scale': h_symmetry_loss_scale,\n", + " 'h_symmetry_switch': h_symmetry_switch,\n", + " 'h_symmetry_offset' : h_symmetry_offset,\n", + " 'h_symmetry_twist': h_symmetry_twist,\n", + " 'v_symmetry_loss': v_symmetry_loss,\n", + " 'v_symmetry_loss_scale': v_symmetry_loss_scale,\n", + " 'v_symmetry_switch': v_symmetry_switch,\n", + " 'v_symmetry_offset' : v_symmetry_offset,\n", + " 'v_symmetry_twist': v_symmetry_twist,\n", + " 'pinwheel_symmetry_loss': pinwheel_symmetry_loss,\n", + " 'pinwheel_symmetry_loss_scale':pinwheel_symmetry_loss_scale,\n", + " 'pinwheel_symmetry_switch': pinwheel_symmetry_switch,\n", " # 'cutn': cutn,\n", " 'cutn_batches': cutn_batches,\n", " 'max_frames': max_frames,\n", @@ -1830,26 +1910,28 @@ { "cell_type": "code", "metadata": { - "id": "ModelSettings" + "id": "ModelSettings", + "cellView": "form" }, "source": [ "#@markdown ####**Models Settings:**\n", "diffusion_model = \"512x512_diffusion_uncond_finetune_008100\" #@param [\"256x256_diffusion_uncond\", \"512x512_diffusion_uncond_finetune_008100\", \"custom\"]\n", "use_secondary_model = True #@param {type: 'boolean'}\n", "diffusion_sampling_mode = 'ddim' #@param ['plms','ddim']\n", + "\n", "#@markdown #####**Custom model:**\n", - "custom_path = '/content/drive/MyDrive/deep_learning/ddpm/ema_0.9999_058000.pt'#@param {type: 'string'}\n", + "custom_path = ''#@param {type: 'string'}\n", "\n", "#@markdown #####**CLIP settings:**\n", "use_checkpoint = True #@param {type: 'boolean'}\n", "ViTB32 = True #@param{type:\"boolean\"}\n", "ViTB16 = True #@param{type:\"boolean\"}\n", "ViTL14 = False #@param{type:\"boolean\"}\n", - "ViTL14_336px = False #@param{type:\"boolean\"}\n", - "RN101 = False #@param{type:\"boolean\"}\n", - "RN50 = True #@param{type:\"boolean\"}\n", + "ViTL14_336px = True #@param{type:\"boolean\"}\n", + "RN101 = True #@param{type:\"boolean\"}\n", + "RN50 = False #@param{type:\"boolean\"}\n", "RN50x4 = False #@param{type:\"boolean\"}\n", - "RN50x16 = False #@param{type:\"boolean\"}\n", + "RN50x16 = True #@param{type:\"boolean\"}\n", "RN50x64 = False #@param{type:\"boolean\"}\n", "\n", "#@markdown If you're having issues with model downloads, check this to compare SHA's:\n", @@ -2075,11 +2157,12 @@ { "cell_type": "code", "metadata": { - "id": "BasicSettings" + "id": "BasicSettings", + "cellView": "form" }, "source": [ "#@markdown ####**Basic Settings:**\n", - "batch_name = 'TimeToDisco' #@param{type: 'string'}\n", + "batch_name = 'DruidDiffusion' #@param{type: 'string'}\n", "steps = 250 #@param [25,50,100,150,250,500,1000]{type: 'raw', allow-input: true}\n", "width_height = [1280, 768] #@param{type: 'raw'}\n", "clip_guidance_scale = 5000 #@param{type: 'number'}\n", @@ -2089,7 +2172,39 @@ "cutn_batches = 4#@param{type: 'number'}\n", "skip_augs = False#@param{type: 'boolean'}\n", "\n", - "#@markdown ####**Video Init Basic Settings:**\n", + "#@markdown ####**SYMMETRY:**\n", + "#@markdown ####**Transformation Settings:**\n", + "use_vertical_symmetry = False #@param {type:\"boolean\"}\n", + "use_horizontal_symmetry = False #@param {type:\"boolean\"}\n", + "transformation_percent = [0.09] #@param\n", + "#@markdown ####**Loss Settings:**\n", + "#@markdown 🔽 Horizontal Symmetry Settings \n", + "h_symmetry_loss = False #@param {type:\"boolean\"}\n", + "h_symmetry_loss_scale = 2400 #@param {type:\"number\"}\n", + "h_symmetry_switch = 70 #@param {type:\"number\"} \n", + "h_symmetry_offset = 0 #@param {type:\"number\"}\n", + "h_symmetry_twist = False #@param {type:\"boolean\"}\n", + "\n", + "#@markdown 🔽 Vertical Symmetry Settings \n", + "v_symmetry_loss = False #@param {type:\"boolean\"}\n", + "v_symmetry_loss_scale = 2400 #@param {type:\"number\"}\n", + "v_symmetry_switch = 70 #@param {type:\"number\"}\n", + "v_symmetry_offset = 0 #@param {type:\"number\"}\n", + "v_symmetry_twist = False #@param {type:\"boolean\"}\n", + "\n", + "#@markdown 🔽 Pinwheel Symmetry Settings \n", + "pinwheel_symmetry_loss = False #@param {type:\"boolean\"}\n", + "pinwheel_symmetry_loss_scale = 2400 #@param {type:\"number\"}\n", + "pinwheel_symmetry_switch = 80 #@param {type:\"number\"} \n", + "#@markdown ---\n", + "\n", + "#@markdown ####**Image Init Settings:**\n", + "init_image = \"\" #@param{type: 'string'}\n", + "init_scale = 0#@param{type: 'integer'}\n", + "skip_steps = 0#@param{type: 'integer'}\n", + "#@markdown *Make sure you set skip_steps to ~50% of your steps if you want to use an init image.*\n", + "\n", + "#@markdown ####**Video Init Settings:**\n", "video_init_steps = 100 #@param [25,50,100,150,250,500,1000]{type: 'raw', allow-input: true}\n", "video_init_clip_guidance_scale = 1000 #@param{type: 'number'}\n", "video_init_tv_scale = 0.1#@param{type: 'number'}\n", @@ -2098,23 +2213,23 @@ "video_init_cutn_batches = 4#@param{type: 'number'}\n", "video_init_skip_steps = 50 #@param{type: 'integer'}\n", "\n", - "#@markdown ---\n", - "\n", - "#@markdown ####**Init Image Settings:**\n", - "init_image = None #@param{type: 'string'}\n", - "init_scale = 1000 #@param{type: 'integer'}\n", - "skip_steps = 10 #@param{type: 'integer'}\n", - "#@markdown *Make sure you set skip_steps to ~50% of your steps if you want to use an init image.*\n", - "\n", "#Get corrected sizes\n", "side_x = (width_height[0]//64)*64;\n", "side_y = (width_height[1]//64)*64;\n", "if side_x != width_height[0] or side_y != width_height[1]:\n", - " print(f'Changing output size to {side_x}x{side_y}. Dimensions must by multiples of 64.')\n", + " print(f'Changing output size to {side_x}x{side_y}. Dimensions must by multiples of 64.')\n", + "\n", + "#Update Model Settings\n", + "timestep_respacing = f'ddim{steps}'\n", + "diffusion_steps = (1000//steps)*steps if steps < 1000 else steps\n", + "model_config.update({\n", + " 'timestep_respacing': timestep_respacing,\n", + " 'diffusion_steps': diffusion_steps,\n", + "})\n", "\n", "#Make folder for batch\n", "batchFolder = f'{outDirPath}/{batch_name}'\n", - "createPath(batchFolder)" + "createPath(batchFolder)\n" ], "outputs": [], "execution_count": null @@ -2131,7 +2246,8 @@ { "cell_type": "code", "metadata": { - "id": "AnimSettings" + "id": "AnimSettings", + "cellView": "form" }, "source": [ "#@markdown ####**Animation Mode:**\n", @@ -2143,14 +2259,14 @@ "\n", "#@markdown ####**Video Input Settings:**\n", "if is_colab:\n", - " video_init_path = \"/content/drive/MyDrive/AI/Disco_Diffusion/init.mp4\" #@param {type: 'string'}\n", + " video_init_path = \"\" #@param {type: 'string'}\n", "else:\n", - " video_init_path = \"init.mp4\" #@param {type: 'string'}\n", + " video_init_path = \"\" #@param {type: 'string'}\n", "extract_nth_frame = 2 #@param {type: 'number'}\n", "persistent_frame_output_in_batch_folder = True #@param {type: 'boolean'}\n", "video_init_seed_continuity = False #@param {type: 'boolean'}\n", "#@markdown #####**Video Optical Flow Settings:**\n", - "video_init_flow_warp = True #@param {type: 'boolean'}\n", + "video_init_flow_warp = False #@param {type: 'boolean'}\n", "# Call optical flow from video frames and warp prev frame with flow\n", "video_init_flow_blend = 0.999#@param {type: 'number'} #0 - take next frame, 1 - take prev warped frame\n", "video_init_check_consistency = False #Insert param here when ready\n", @@ -2509,7 +2625,8 @@ { "cell_type": "code", "metadata": { - "id": "InstallRAFT" + "id": "InstallRAFT", + "cellView": "form" }, "source": [ "#@title Install RAFT for Video input animation mode only\n", @@ -2517,6 +2634,7 @@ "#@markdown Use force download to reload raft models if needed\n", "force_download = False #@param {type:'boolean'}\n", "if animation_mode == 'Video Input':\n", + " os.chdir(f'{PROJECT_DIR}')\n", " try:\n", " from raft import RAFT\n", " except:\n", @@ -2524,11 +2642,10 @@ " gitclone('https://github.com/princeton-vl/RAFT', os.path.join(PROJECT_DIR, 'RAFT'))\n", " sys.path.append(f'{PROJECT_DIR}/RAFT')\n", "\n", - " if (not (os.path.exists(f'{root_path}/RAFT/models'))) or force_download:\n", - " os.chdir(f'{root_path}/RAFT/')\n", + " os.chdir(PROJECT_DIR)\n", + " if (not (os.path.exists(f'/{root_path}/RAFT/models'))) or force_download:\n", " sub_p_res = subprocess.run(['bash', './download_models.sh'], stdout=subprocess.PIPE).stdout.decode('utf-8')\n", - " print(sub_p_res)\n", - " os.chdir(PROJECT_DIR)" + " print(sub_p_res)" ], "outputs": [], "execution_count": null @@ -2536,7 +2653,8 @@ { "cell_type": "code", "metadata": { - "id": "FlowFns1" + "id": "FlowFns1", + "cellView": "form" }, "source": [ "#@title Define optical flow functions for Video input animation mode only\n", @@ -2682,7 +2800,8 @@ { "cell_type": "code", "metadata": { - "id": "FlowFns2" + "id": "FlowFns2", + "cellView": "form" }, "source": [ "#@title Generate optical flow and consistency maps\n", @@ -2710,7 +2829,7 @@ " if len(frames)>=2:\n", " \n", " raft_model = torch.nn.DataParallel(RAFT(args2))\n", - " raft_model.load_state_dict(torch.load(f'{root_path}/RAFT/models/raft-things.pth'))\n", + " raft_model.load_state_dict(torch.load(f'/{root_path}/RAFT/models/raft-things.pth'))\n", " raft_model = raft_model.module.cuda().eval()\n", " \n", " for f in pathlib.Path(f'{flo_fwd_folder}').glob('*.*'):\n", @@ -2758,7 +2877,8 @@ { "cell_type": "code", "metadata": { - "id": "ExtraSettings" + "id": "ExtraSettings", + "cellView": "form" }, "source": [ "#@markdown ####**Saving:**\n", @@ -2817,15 +2937,8 @@ "\n", "cut_overview = \"[12]*400+[4]*600\" #@param {type: 'string'} \n", "cut_innercut =\"[4]*400+[12]*600\"#@param {type: 'string'} \n", - "cut_ic_pow = 1#@param {type: 'number'} \n", - "cut_icgray_p = \"[0.2]*400+[0]*600\"#@param {type: 'string'}\n", - "\n", - "#@markdown ---\n", - "\n", - "#@markdown ####**Transformation Settings:**\n", - "use_vertical_symmetry = False #@param {type:\"boolean\"}\n", - "use_horizontal_symmetry = False #@param {type:\"boolean\"}\n", - "transformation_percent = [0.09] #@param" + "cut_ic_pow = \"[1]*400+[100]*300+[50]*300\"#@param {type: 'string'} \n", + "cut_icgray_p = \"[0.2]*400+[0]*600\"#@param {type: 'string'}" ], "outputs": [], "execution_count": null @@ -2833,27 +2946,34 @@ { "cell_type": "markdown", "metadata": { - "id": "PromptsTop" + "id": "ZZvPptqzoQdP" }, "source": [ "### Prompts\n", - "`animation_mode: None` will only use the first set. `animation_mode: 2D / Video` will run through them per the set frames and hold on the last one." + "`animation_mode: None` will only use the first set. `animation_mode: 2D / Video` will run through them per the set frames and hold on the last one.\n", + "#### Pixel Art Diffusion Prompts\n", + "Although the model is fine-tuned on pixel art, it needs reminders that you actually want pixel art. A good rule of thumb for prompt structuring is to follow a format like: \n", + "\n", + "```\n", + "[\"A cyberpunk city at sunset, #pixelart by van gogh\",\"#pixelart\"]\n", + "```\n", + "With a multi-part prompt, try playing around with how many times you remind it to generate images in a pixel art style. " ] }, { "cell_type": "code", "metadata": { - "id": "Prompts" + "id": "Z14m2HhdoQdQ" }, "source": [ "text_prompts = {\n", - " 0: [\"A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation.\", \"yellow color scheme\"],\n", - " 100: [\"This set of prompts start at frame 100\",\"This prompt has weight five:5\"],\n", + " 0: [\"\"],\n", + " \n", "}\n", "\n", "image_prompts = {\n", " # 0:['ImagePromptsWorkButArentVeryGood.png:2',],\n", - "}" + "}\n" ], "outputs": [], "execution_count": null @@ -2869,13 +2989,26 @@ }, { "cell_type": "code", + "source": [ + "torch.cuda.empty_cache()\n", + "gc.collect()" + ], "metadata": { - "id": "DoTheRun" + "id": "k7IM6-jjhfp4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "DoTheRun", + "cellView": "form" }, "source": [ "#@title Do the Run!\n", "#@markdown `n_batches` ignored with animation modes.\n", - "display_rate = 20 #@param{type: 'number'}\n", + "display_rate = 1 #@param{type: 'number'}\n", "n_batches = 50 #@param{type: 'number'}\n", "\n", "if animation_mode == 'Video Input':\n", @@ -2976,9 +3109,26 @@ " 'range_scale': range_scale,\n", " 'sat_scale': sat_scale,\n", " 'cutn_batches': cutn_batches,\n", + " 'h_symmetry_loss': h_symmetry_loss,\n", + " 'h_symmetry_loss_scale': h_symmetry_loss_scale,\n", + " 'h_symmetry_switch': h_symmetry_switch,\n", + " 'h_symmetry_offset' : h_symmetry_offset,\n", + " 'h_symmetry_twist': h_symmetry_twist,\n", + " 'v_symmetry_loss': v_symmetry_loss,\n", + " 'v_symmetry_loss_scale': v_symmetry_loss_scale,\n", + " 'v_symmetry_switch': v_symmetry_switch,\n", + " 'v_symmetry_offset' : v_symmetry_offset,\n", + " 'v_symmetry_twist': v_symmetry_twist,\n", + " 'pinwheel_symmetry_loss': pinwheel_symmetry_loss,\n", + " 'pinwheel_symmetry_loss_scale':pinwheel_symmetry_loss_scale,\n", + " 'pinwheel_symmetry_switch': pinwheel_symmetry_switch,\n", + " #'temp_ic_pow':temp_ic_pow,\n", " 'init_image': init_image,\n", " 'init_scale': init_scale,\n", " 'skip_steps': skip_steps,\n", + " #'skip_end_steps': skip_end_steps,\n", + " #'sharpen_preset': sharpen_preset,\n", + " #'keep_unsharp': keep_unsharp,\n", " 'side_x': side_x,\n", " 'side_y': side_y,\n", " 'timestep_respacing': timestep_respacing,\n", @@ -3015,13 +3165,15 @@ " 'rotation_3d_y_series':rotation_3d_y_series,\n", " 'rotation_3d_z_series':rotation_3d_z_series,\n", " 'frames_scale': frames_scale,\n", + " 'calc_frames_skip_steps': calc_frames_skip_steps,\n", " 'skip_step_ratio': skip_step_ratio,\n", " 'calc_frames_skip_steps': calc_frames_skip_steps,\n", " 'text_prompts': text_prompts,\n", " 'image_prompts': image_prompts,\n", " 'cut_overview': eval(cut_overview),\n", " 'cut_innercut': eval(cut_innercut),\n", - " 'cut_ic_pow': cut_ic_pow,\n", + " 'cut_ic_pow': eval(cut_ic_pow),\n", + " #'cut_ic_pow_final': cut_ic_pow_final,\n", " 'cut_icgray_p': eval(cut_icgray_p),\n", " 'intermediate_saves': intermediate_saves,\n", " 'intermediates_in_subfolder': intermediates_in_subfolder,\n", @@ -3121,7 +3273,7 @@ "# @title ### **Create video**\n", "#@markdown Video file will save in the same folder as your images.\n", "from tqdm.notebook import trange\n", - "skip_video_for_run_all = False #@param {type: 'boolean'}\n", + "skip_video_for_run_all = True #@param {type: 'boolean'}\n", "\n", "if animation_mode == 'Video Input':\n", " frames = sorted(glob(in_path+'/*.*'));\n", @@ -3248,6 +3400,338 @@ ], "outputs": [], "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "5V2vjMN8pHWt" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xc1W6_SAAvJU" + }, + "source": [ + "# Prompt Engineering (Outdated but still useful Documentation)\n", + "\n", + "Collapse this section if it makes you feel cramped-" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u4Ut78Lol7EN" + }, + "source": [ + "##Performance Settings (VRAM Troubleshooting)\n", + "\n", + "- `cutn`: Controls image quality\n", + "- `cutn_batches` : Lessens the amount of cutn required by doing bursts of cutn. If you want to use a cutn of 60, but don’t have enough VRAM, set cutn to 30 and cutn_n to 2.\n", + "- `esrgan_tilesize`: If you need to squeeze a little more VRAM to complete the upscale, this is the setting.\n", + "---\n", + "- **Defaults (P100)**: `cutn=30`, `cutn_batches = 2 or 4`, `esrgan_tilesize = 512 or 1024`\n", + "\n", + "---\n", + "\n", + "##Image Quality Settings\n", + "- `clip_guidance_scale` controls how much the image should look like the prompt\n", + "- `tv_scale` controls smoothing\n", + " - set to 0 for crispy, otherwise 50-150 works good. Can go up to 10,000.\n", + "- `range_scale` Controls how far out of range RGB values are allowed to be.\n", + "- `scale_multiplier` multiplies clip_guidance_scale,tv_scale,range_scale\n", + " - On init images, 50 might be a good value; otherwise 150.\n", + "- `skip_timesteps` Controls the starting point along the diffusion timesteps.\n", + " - If you set `timestep_respacing` to ddim50 and `skip_timesteps` to `10`, your image will do 40 iterations.\n", + "- `clip_denoised` tries to filter out noise from generation.\n", + "- `fuzzy_prompt` attempts to include noise in generation\n", + "---\n", + "- **Defaults for non-init images**: `clip_guidance_scale=5000`,`tv_scale=100`,`range_scale=150`\n", + "- **Defaults for init images**: `clip_guidance_scale=2000`,`tv_scale=100`,`range_scale=50`\n", + "---\n", + "##Init Settings\n", + "- Leave `_init_image` blank if you want to generate from scratch.\n", + "- Set `_noise_amount` to 0 (Perlin noise not currently compatible)\n", + "- Set `skip_timesteps` to `30-40%` of your `timestep_respacing` value when using an init image. This setting also controls blur to a certain extent. \n", + "A possible good value for `_init_scale` is 1000. If set to `0` it will default back to 1000 when an init image is present.\n", + "- `_noise_amount` settings should be deactivated if an `init_image` is detected.\n", + "- `init_scale` controls how strictly the image adheres to the init image. `1000` is a good value, but for lower `skip_timesteps` you can set `init scale` to `5000`. \n", + "\n", + "---\n", + "##A100 settings\n", + "The a100 does much better with `cutn_batch` set to 1. You can set `cutn` up to `128` replicate results.\n", + "\n", + "---\n", + "**Examples of diffusion prompts:**\n", + "\n", + "- `Representation of chronic anxiety:2, illustrated by Luigi Serafini:1, inspired by the Codex Seraphinianus:1`\n", + "\n", + "- `Sad panda vector art made in Blender 3d:4, nature background:2, 4k parallax vibrant colorful panda bear trending on artstation:1` with init scale of 5, init image of https://www.pngitem.com/pimgs/m/16-161339_giant-panda-bear-silhouette-drawing-clip-art-silhouette.png\n", + "\n", + "- `Representation of depression | isometric vector art titled 'Depression'`\n", + "\n", + "- `Liminal space, liminal hotel hallway rendered in unreal engine, top post on r/liminalspaces`\n", + "\n", + "- `art piece titled 'The meaning of life is to find meaning IN life', existential clipart featuring your friend Dave, vector clipart anime faces`\n", + "\n", + "- `Vector art named 'Chronic procrastination equals suicidal ideation', vector HD clipart featuring suicide`\n", + "\n", + "---\n", + "\n", + "\n", + "*If you use `:` (weights), you must weigh out everything or you'll receive an error. For example, if you have three things and one has a weight of `:2`, the other two things need to have a weight of `:1`.\n", + "\n", + "*I am currently unsure whether `|` (pipes) do the same thing as commas. Also unsure whether having spaces between commas makes a different outcome. Weights work for both.*\n", + "\n", + "---\n", + "\n", + "Diffusion prompting might seem trickier to master than VQGAN models, but it still allows for some level of control. Sometimes less it better.\n", + "\n", + "It doesn't always need the `trending on artstation` type of lingo; sometimes it will benefit more from something like `photorealistic 4k nature replication`.\n", + "\n", + "Diffusion is kind of unique in how it often doesn't duplicate the desired subject. For example, `cat photo` will usually only give you 1 cat.\n", + "\n", + "------------------------------------------------\n", + "\n", + "### A Giant List of Terms to Try\n", + "\n", + "Credit for this list goes to @Atman on Discord and/or whoever else contributed to the Pastebin.\n", + "\n", + "*'8k resolution'\n", + ",'pencil sketch'\n", + ",'8K 3D'\n", + ",'creative commons attribution'\n", + ",'deviantart'\n", + ",'CryEngine'\n", + ",'Unreal Engine'\n", + ",'concept art'\n", + ",'photoillustration'\n", + ",'pixiv'\n", + ",'Flickr'\n", + ",'ArtStation HD'\n", + ",'Behance HD'\n", + ",'HDR'\n", + ",'anime'\n", + ",'filmic'\n", + ",'Stock photo'\n", + ",'Ambient occlusion'\n", + ",'Global illumination'\n", + ",'Chalk art'\n", + ",'Low poly'\n", + ",'Booru'\n", + ",'Polycount'\n", + ",'Acrylic art'\n", + ",'Hyperrealism'\n", + ",'Zbrush Central'\n", + ",'Rendered in Cinema4D'\n", + ",'Rendered in Maya'\n", + ",'Photo taken with Nikon D750'\n", + ",'Tilt shift'\n", + ",'Mixed media'\n", + ",'Depth of field'\n", + ",'DSLR'\n", + ",'Detailed painting'\n", + ",'Volumetric lighting'\n", + ",'Storybook illustration'\n", + ",'Unsplash contest winner'\n", + ",'#vfxfriday'\n", + ",'Ultrafine detail'\n", + ",'20 megapixels'\n", + ",'Photo taken with Fujifilm Superia'\n", + ",'Photo taken with Ektachrome'\n", + ",'matte painting'\n", + ",'reimagined by industrial light and magic'\n", + ",'Watercolor'\n", + ",'CGSociety'\n", + ",'childs drawing'\n", + ",'marble sculpture'\n", + ",'airbrush art'\n", + ",'renaissance painting'\n", + ",'Velvia'\n", + ",'Provia'\n", + ",'photo taken with Provia'\n", + ",'prerendered graphics'\n", + ",'criterion collection'\n", + ",'dye-transfer'\n", + ",'stipple'\n", + ",'Parallax'\n", + ",'Bryce 3D'\n", + ",'Terragen'\n", + ",'(2013) directed by cinematography by'\n", + ",'Bokeh'\n", + ",'1990s 1995'\n", + ",'1970s 1975'\n", + ",'1920s 1925'\n", + ",'charcoal drawing'\n", + ",'commission for'\n", + ",'furaffinity'\n", + ",'flat shading'\n", + ",'ink drawing'\n", + ",'artwork'\n", + ",'oil on canvas'\n", + ",'macro photography'\n", + ",'hall of mirrors'\n", + ",'polished'\n", + ",'sunrays shine upon it'\n", + ",'aftereffects'\n", + ",'iridescent'\n", + ",'#film'\n", + ",'datamosh'\n", + ",'(1962) directed by cinematography'\n", + ",'holographic'\n", + ",'dutch golden age'\n", + ",'digitally enhanced'\n", + ",'National Geographic photo'\n", + ",'Associated Press photo'\n", + ",'matte background'\n", + ",'Art on Instagram'\n", + ",'#myportfolio'\n", + ",'digital illustration'\n", + ",'stock photo'\n", + ",'aftereffects'\n", + ",'speedpainting'\n", + ",'colorized'\n", + ",'detailed'\n", + ",'psychedelic'\n", + ",'wavy'\n", + ",'groovy'\n", + ",'movie poster'\n", + ",'pop art'\n", + ",'made of beads and yarn'\n", + ",'made of feathers'\n", + ",'made of crystals'\n", + ",'made of liquid metal'\n", + ",'made of glass'\n", + ",'made of cardboard'\n", + ",'made of vines'\n", + ",'made of cheese'\n", + ",'made of flowers'\n", + ",'made of insects'\n", + ",'made of mist'\n", + ",'made of paperclips'\n", + ",'made of rubber'\n", + ",'made of plastic'\n", + ",'made of wire'\n", + ",'made of trash'\n", + ",'made of wrought iron'\n", + ",'made of all of the above'\n", + ",'tattoo'\n", + ",'woodcut'\n", + ",'American propaganda'\n", + ",'Soviet propaganda'\n", + ",'PS1 graphics'\n", + ",'Fine art'\n", + ",'HD mod'\n", + ",'Photorealistic'\n", + ",'Poster art'\n", + ",'Constructivism'\n", + ",'pre-Raphaelite'\n", + ",'Impressionism'\n", + ",'Lowbrow'\n", + ",'RTX on'\n", + ",'chiaroscuro'\n", + ",'Egyptian art'\n", + ",'Fauvism'\n", + ",'shot on 70mm'\n", + ",'Art Deco'\n", + ",'Picasso'\n", + ",'Da Vinci'\n", + ",'Academic art'\n", + ",'3840x2160'\n", + ",'Photocollage'\n", + ",'Cubism'\n", + ",'Surrealist'\n", + ",'THX Sound'\n", + ",'ZBrush'\n", + ",'Panorama'\n", + ",'smooth'\n", + ",'DC Comics'\n", + ",'Marvel Comics'\n", + ",'Ukiyo-e'\n", + ",'Flemish Baroque'\n", + ",'vray tracing'\n", + ",'pixel perfect'\n", + ",'quantum wavetracing'\n", + ",'Zbrush central contest winner'\n", + ",'ISO 200'\n", + ",'Bob Ross'\n", + ",'32k HUHD'\n", + ",'Photocopy'\n", + ",'DeviantArt HD'\n", + ",'infrared'\n", + ",'Angelic photograph'\n", + ",'Demonic photograph'\n", + ",'Biomorphic'\n", + ",'Windows Vista'\n", + ",'Skeuomorphic'\n", + ",'Physically based rendering'\n", + ",'Trance compilation CD'\n", + ",'Concert poster'\n", + ",'Steampunk'\n", + ",'Sketchfab'\n", + ",'Goth'\n", + ",'Wiccan'\n", + ",'trending on artstation'\n", + ",'featured on artstation'\n", + ",'artstation HQ'\n", + ",'artstation contest winner'\n", + ",'ultra HD'\n", + ",'high quality photo'\n", + ",'instax'\n", + ",'ilford HP5'\n", + ",'infrared'\n", + ",'Lomo'\n", + ",'Matte drawing'\n", + ",'matte photo'\n", + ",'glowing neon'\n", + ",'Xbox 360 graphics'\n", + ",'flickering light'\n", + ",'Playstation 5 screenshot'\n", + ",'Kodak Gold 200'\n", + ",'by Edward Hopper'\n", + ",'rough'\n", + ",'maximalist'\n", + ",'minimalist'\n", + ",'Kodak Ektar'\n", + ",'Kodak Portra'\n", + ",'geometric'\n", + ",'cluttered'\n", + ",'Rococo'\n", + ",'destructive'\n", + ",'by James Gurney'\n", + ",'by Thomas Kinkade'\n", + ",'by Vincent Di Fate'\n", + ",'by Jim Burns'\n", + ",'androgynous'\n", + ",'masculine'\n", + ",'genderless'\n", + ",'feminine'\n", + ",'extremely gendered, masculine and feminine'\n", + ",'4k result'\n", + ",'#pixelart'\n", + ",'voxel art'\n", + ",'wimmelbilder'\n", + ",'dystopian art'\n", + ",'apocalypse art'\n", + ",'apocalypse landscape'\n", + ",'2D game art'\n", + ",'Windows XP'\n", + ",'y2k aesthetic'\n", + ",'#screenshotsaturday'\n", + ",'seapunk'\n", + ",'vaporwave'\n", + ",'Ilya Kuvshinov'\n", + ",'Paul Cezanne'\n", + ",'Henry Moore'\n", + ",'phallic'\n", + ",'creepypasta'\n", + ",'retrowave'\n", + ",'synthwave'\n", + ",'outrun'*" + ] } ], "metadata": { @@ -3257,24 +3741,12 @@ "collapsed_sections": [ "CreditsChTop", "TutorialTop", - "CheckGPU", - "InstallDeps", - "DefMidasFns", - "DefFns", - "DefSecModel", - "DefSuperRes", - "AnimSetTop", - "ExtraSetTop", - "InstallRAFT", - "CustModel", - "FlowFns1", - "FlowFns2" + "AnimSetTop" ], - "machine_shape": "hm", - "name": "Disco Diffusion v5.4 [Now with Warp]", + "name": "HighDruidMotas Custom Disco Diffusion v5.4.?!", "private_outputs": true, "provenance": [], - "include_colab_link": true + "machine_shape": "hm" }, "kernelspec": { "display_name": "Python 3", @@ -3292,8 +3764,9 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" - } + }, + "gpuClass": "standard" }, "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "nbformat_minor": 0 +} diff --git a/README.md b/README.md index 34d5a6bf..3990a73b 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# Disco Diffusion +# Druid Diffusion -Open in Colab +Open in Colab -A frankensteinian amalgamation of notebooks, models and techniques for the generation of AI Art and Animations. +A Druid's re-amalgamation of an already frankensteinian amalgamation of notebooks, models and techniques for the generation of AI Art and Animations. [to be updated with further info soon] @@ -87,6 +87,9 @@ the tool being used is called [Colab-Convert](https://github.com/MSFTserver/cola * Warp mode - for smooth/continuous video input results leveraging optical flow estimation and frame blending * Custom models support +#### v5.4 Update: Jun 16th 2022 - Symmetry Loss Function(s) by PsiloCyborg (https://twitter.com/psilocyborg_nft), Tomas Gravenmier (https://twitter.com/HighDruidMotas), and Carson Bentley (https://twitter.com/Aztecman_Dnb) + + ## Notebook Provenance Original notebook by Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). It uses either OpenAI's 256x256 unconditional ImageNet or Katherine Crowson's fine-tuned 512x512 diffusion model (https://github.com/openai/guided-diffusion), together with CLIP (https://github.com/openai/CLIP) to connect text prompts with images. @@ -115,4 +118,6 @@ VR Mode by Tom Mason (https://twitter.com/nin_artificial) Horizontal and Vertical symmetry functionality by nshepperd. Symmetry transformation_steps by huemin (https://twitter.com/huemin_art). Symmetry integration into Disco Diffusion by Dmitrii Tochilkin (https://twitter.com/cut_pow). +Symmetry Loss Function(s) by PsiloCyborg (https://twitter.com/psilocyborg_nft), Tomas Gravenmier (https://twitter.com/HighDruidMotas), and Carson Bentley (https://twitter.com/Aztecman_Dnb) + Warp and custom model support by Alex Spirin (https://twitter.com/devdef).