Some utility functions I frequently use with 🤗 diffusers.
Project description
cjm-diffusers-utils
Install
pip install cjm_diffusers_utils
How to use
pil_to_latent
from cjm_diffusers_utils.core import pil_to_latent
from PIL import Image # For working with images
from torchvision import transforms # PyTorch module for image transformations
# Import diffusers AutoencoderKL
from diffusers import AutoencoderKL
model_name = "stabilityai/stable-diffusion-2-1"
vae = AutoencoderKL.from_pretrained(model_name, subfolder="vae")
img_path = img_path = '../images/cat.jpg'
src_img = Image.open(img_path).convert('RGB')
print(f"Source Image Size: {src_img.size}")
img_latents = pil_to_latent(src_img, vae)
print(f"Latent Dimensions: {img_latents.shape}")
Source Image Size: (768, 512)
Latent Dimensions: torch.Size([1, 4, 64, 96])
latent_to_pil
from cjm_diffusers_utils.core import latent_to_pil
decoded_img = latent_to_pil(img_latents, vae)
print(f"Decoded Image Size: {decoded_img.size}")
Decoded Image Size: (768, 512)
text_to_emb
from cjm_diffusers_utils.core import text_to_emb
# Import the `CLIPTextModel`, `CLIPTokenizer`
from transformers import CLIPTextModel, CLIPTokenizer
# Load the tokenizer for the specified model
tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
# Load the text encoder for the specified model
text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder")
prompt = "A cat sitting on the floor."
text_emb = text_to_emb(prompt, tokenizer, text_encoder)
text_emb.shape
torch.Size([2, 77, 1024])
prepare_noise_scheduler
from cjm_diffusers_utils.core import prepare_noise_scheduler
from diffusers import DDIMScheduler
noise_scheduler = DDIMScheduler.from_pretrained(model_name, subfolder='scheduler')
print(f"Number of timesteps: {len(noise_scheduler.timesteps)}")
print(noise_scheduler.timesteps[:10])
noise_scheduler = prepare_noise_scheduler(noise_scheduler, 70, 1.0)
print(f"Number of timesteps: {len(noise_scheduler.timesteps)}")
print(noise_scheduler.timesteps[:10])
Number of timesteps: 1000
tensor([999, 998, 997, 996, 995, 994, 993, 992, 991, 990])
Number of timesteps: 70
tensor([967, 953, 939, 925, 911, 897, 883, 869, 855, 841])
prepare_depth_mask
from cjm_diffusers_utils.core import prepare_depth_mask
depth_map_path = '../images/depth-cat.png'
depth_map = Image.open(depth_map_path)
print(f"Depth map size: {depth_map.size}")
depth_mask = prepare_depth_mask(depth_map)
depth_mask.shape, depth_mask.min(), depth_mask.max()
Depth map size: (768, 512)
(torch.Size([1, 1, 64, 96]), tensor(-1.), tensor(1.))
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Close
Hashes for cjm-diffusers-utils-0.0.1.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | 17a5dc87a57d204be6b8cc1201994452b191994f9f482054f4872db80dfb4bbc |
|
MD5 | d970d75fc3135132bf63129f4aba0176 |
|
BLAKE2b-256 | 71a94a829206ae5d2441815b6219abb6621108e29b82a594e99427e4feb0e32e |
Close
Hashes for cjm_diffusers_utils-0.0.1-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 7e0e40ba414c2452ae6b776b733000e33a1a0c4438027fc9a3e67bd3c7be0529 |
|
MD5 | 597b0705dfb1fdf894c61f312f877f06 |
|
BLAKE2b-256 | 8ead05996b116d86f7f4c6b1594af69cf240f83d6968b001d147a1318e73d248 |