Instructions to use Sanster/PowerPaint_v2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use Sanster/PowerPaint_v2 with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("Sanster/PowerPaint_v2", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| import sys | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| from PIL import Image, ImageOps | |
| from transformers import CLIPTextModel, CLIPTokenizer | |
| from diffusers.utils import load_image | |
| from diffusers import DPMSolverMultistepScheduler | |
| from powerpaint_v2.BrushNet_CA import BrushNetModel | |
| from powerpaint_v2.pipeline_PowerPaint_Brushnet_CA import ( | |
| StableDiffusionPowerPaintBrushNetPipeline, | |
| ) | |
| from powerpaint_v2.power_paint_tokenizer import PowerPaintTokenizer | |
| from powerpaint_v2.unet_2d_condition import UNet2DConditionModel | |
| def task_to_prompt(control_type): | |
| if control_type == "object-removal": | |
| promptA = "P_ctxt" | |
| promptB = "P_ctxt" | |
| negative_promptA = "P_obj" | |
| negative_promptB = "P_obj" | |
| elif control_type == "context-aware": | |
| promptA = "P_ctxt" | |
| promptB = "P_ctxt" | |
| negative_promptA = "" | |
| negative_promptB = "" | |
| elif control_type == "shape-guided": | |
| promptA = "P_shape" | |
| promptB = "P_ctxt" | |
| negative_promptA = "P_shape" | |
| negative_promptB = "P_ctxt" | |
| elif control_type == "image-outpainting": | |
| promptA = "P_ctxt" | |
| promptB = "P_ctxt" | |
| negative_promptA = "P_obj" | |
| negative_promptB = "P_obj" | |
| else: | |
| promptA = "P_obj" | |
| promptB = "P_obj" | |
| negative_promptA = "P_obj" | |
| negative_promptB = "P_obj" | |
| return promptA, promptB, negative_promptA, negative_promptB | |
| def predict( | |
| pipe, | |
| input_image, | |
| prompt, | |
| fitting_degree, | |
| ddim_steps, | |
| scale, | |
| negative_prompt, | |
| task, | |
| ): | |
| promptA, promptB, negative_promptA, negative_promptB = task_to_prompt(task) | |
| print(task, promptA, promptB, negative_promptA, negative_promptB) | |
| img = np.array(input_image["image"].convert("RGB")) | |
| W = int(np.shape(img)[0] - np.shape(img)[0] % 8) | |
| H = int(np.shape(img)[1] - np.shape(img)[1] % 8) | |
| input_image["image"] = input_image["image"].resize((H, W)) | |
| input_image["mask"] = input_image["mask"].resize((H, W)) | |
| np_inpimg = np.array(input_image["image"]) | |
| np_inmask = np.array(input_image["mask"]) / 255.0 | |
| np_inpimg = np_inpimg * (1 - np_inmask) | |
| input_image["image"] = Image.fromarray(np_inpimg.astype(np.uint8)).convert("RGB") | |
| result = pipe( | |
| promptA=promptA, | |
| promptB=promptB, | |
| promptU=prompt, | |
| tradoff=fitting_degree, | |
| tradoff_nag=fitting_degree, | |
| image=input_image["image"].convert("RGB"), | |
| mask=input_image["mask"].convert("RGB"), | |
| num_inference_steps=ddim_steps, | |
| brushnet_conditioning_scale=1.0, | |
| negative_promptA=negative_promptA, | |
| negative_promptB=negative_promptB, | |
| negative_promptU=negative_prompt, | |
| guidance_scale=scale, | |
| width=H, | |
| height=W, | |
| ).images[0] | |
| return result | |
| # base_model_name = "runwayml/stable-diffusion-v1-5" | |
| base_model_name = sys.argv[1] | |
| text_encoder_brushnet = CLIPTextModel.from_pretrained( | |
| "text_encoder_brushnet", | |
| variant="fp16", | |
| torch_dtype=torch.float16, | |
| ) | |
| unet = UNet2DConditionModel.from_pretrained( | |
| base_model_name, | |
| subfolder="unet", | |
| variant="fp16", | |
| torch_dtype=torch.float16, | |
| ) | |
| brushnet = BrushNetModel.from_pretrained( | |
| "./PowerPaint_Brushnet", | |
| variant="fp16", | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = StableDiffusionPowerPaintBrushNetPipeline.from_pretrained( | |
| base_model_name, | |
| torch_dtype=torch.float16, | |
| safety_checker=None, | |
| unet=unet, | |
| brushnet=brushnet, | |
| text_encoder_brushnet=text_encoder_brushnet, | |
| variant="fp16", | |
| ) | |
| pipe.tokenizer = PowerPaintTokenizer(CLIPTokenizer.from_pretrained("./tokenizer")) | |
| pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
| pipe = pipe.to("mps") | |
| img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" | |
| mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" | |
| image = load_image(img_url).convert("RGB").resize((512, 512)) | |
| mask = load_image(mask_url).convert("RGB").resize((512, 512)) | |
| input_image = {"image": image, "mask": mask} | |
| prompt = "Face of a fox sitting on a bench" | |
| negative_prompt = "out of frame, lowres, error, cropped, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, out of frame, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, disfigured, gross proportions, malformed limbs, watermark, signature" | |
| fitting_degree = 1 | |
| steps = 30 | |
| tasks = [ | |
| { | |
| "task": "object-removal", | |
| "guidance_scale": 12, | |
| "prompt": "empty scene blur", | |
| "negative_prompt": "", | |
| }, | |
| { | |
| "task": "shape-guided", | |
| "guidance_scale": 7.5, | |
| "prompt": prompt, | |
| "negative_prompt": negative_prompt, | |
| }, | |
| { | |
| "task": "context-aware", | |
| "guidance_scale": 7.5, | |
| "prompt": "empty secne", | |
| "negative_prompt": negative_prompt, | |
| }, | |
| { | |
| "task": "inpaint", | |
| "guidance_scale": 7.5, | |
| "prompt": prompt, | |
| "negative_prompt": negative_prompt, | |
| }, | |
| { | |
| "task": "image-outpainting", | |
| "guidance_scale": 7.5, | |
| "prompt": "", | |
| "negative_prompt": negative_prompt, | |
| }, | |
| ] | |
| for task in tasks: | |
| if task["task"] == "image-outpainting": | |
| margin = 128 | |
| input_image["image"] = ImageOps.expand( | |
| input_image["image"], | |
| border=(margin, margin, margin, margin), | |
| fill=(127, 127, 127), | |
| ) | |
| outpaint_mask = np.zeros_like(np.asarray(input_image["mask"])) | |
| input_image["mask"] = Image.fromarray( | |
| cv2.copyMakeBorder( | |
| outpaint_mask, | |
| margin, | |
| margin, | |
| margin, | |
| margin, | |
| cv2.BORDER_CONSTANT, | |
| value=(255, 255, 255), | |
| ) | |
| ) | |
| result_image = predict( | |
| pipe, | |
| input_image, | |
| task["prompt"], | |
| fitting_degree, | |
| steps, | |
| task["guidance_scale"], | |
| task["negative_prompt"], | |
| task["task"], | |
| ) | |
| result_image.save(f"{task['task']}_result.png") | |