インストールコマンド

!pip install -q transformers==4.31.0 accelerate==0.21.0 diffusers==0.20.0 huggingface_hub==0.16.4 omegaconf==2.3.0
# Beautiful Realistic Asians V7をダウンロードして、diffusersで使える形式に変換する
!wget <https://civitai.com/api/download/models/177164> --content-disposition
!wget <https://raw.githubusercontent.com/huggingface/diffusers/v0.20.0/scripts/convert_original_stable_diffusion_to_diffusers.py>
!python convert_original_stable_diffusion_to_diffusers.py \\
--checkpoint_path /content/beautifulRealistic_v7.safetensors \\
--dump_path /content/beautifulRealistic_v7/ \\
--from_safetensors

!rm -rf beautifulRealistic_v7.safetensors
!rm -rf convert_original_stable_diffusion_to_diffusers.py
!rm -rf sample_data

設定ファイル

config.json.zip

{
  "prompt": "1girl,portrait, medium long shot, bokeh, natural light,looking at the camera, masterpiece, accurate, anatomically correct, textured skin, front lighting, using reflectors, f/1.8, 90mm, Fujifilm GFX 50R, regina display, super detail, high details, high quality, best quality, highres, UHD, 1080P, HD, 4K, 8K",
  "negative_prompt": "bad anatomy, bad proportions, blurry, cloned face, cropped, deformed, dehydrated, disfigured, duplicate, error, extra arms, extra fingers, extra legs, extra limbs, fused fingers, gross proportions, jpeg artifacts, long neck, low quality, lowres, malformed limbs, missing arms, missing legs, morbid, mutated hands, mutation, mutilated, out of frame, poorly drawn face, poorly drawn hands, signature, text, too many fingers, ugly, username, watermark, worst quality, overexposed, underexposed, noise, unbalanced composition, incorrect color balance, oversaturation, lack of focus, bad lighting, distorted perspective, nude, NSFW,monochrome, grayscale, black and white, b&w, sepia, oversaturated, Infrared, duotone, desaturated, low contrast,anime, manga, 2d, illustration, cartoon, paint, oil painting, drawing, paintings, sketches,digital art, watercolor, printmaking, pastel, graffiti, cubism, surrealism, impressionism, pointillism, realism",
  "clip_skip": 2,
  "model_path": "/content/beautifulRealistic_v7",
  "use_prompt_embeddings": true,
  "start_idx": 0,
  "batch_size": 10,
  "num_inference_steps": 40,
  "guidance_scale": 7,
  "width": 768,
  "height": 512
}

実行コマンド

# 標準ライブラリ
import sys
import os
import shutil
import time
import json
import zipfile

# サードパーティのライブラリ
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# Google Colab 関連
from google.colab import files as colab_files

# Hugging Face Transformers & Diffusers
import diffusers
import transformers

# デバイスとデータタイプの設定
def get_device_and_dtype():
    """GPUが利用可能か確認し、適切なデバイスとデータタイプを返します。"""
    if torch.cuda.is_available():
        return torch.device("cuda"), torch.float16
    else:
        return torch.device("cpu"), torch.float32

# パイプラインの設定
def setup_pipeline(model_path, clip_skip, torch_dtype):
    """モデルとテキストエンコーダを読み込み、Diffusionパイプラインをセットアップします。"""
    if clip_skip > 1:
        text_encoder = transformers.CLIPTextModel.from_pretrained(
            "runwayml/stable-diffusion-v1-5",
            subfolder="text_encoder",
            num_hidden_layers=12 - (clip_skip - 1),
            torch_dtype=torch_dtype
        )
        pipe = diffusers.DiffusionPipeline.from_pretrained(
            model_path,
            torch_dtype=torch_dtype,
            safety_checker=None,
            text_encoder=text_encoder,
        )
    else:
        pipe = diffusers.DiffusionPipeline.from_pretrained(
            model_path,
            torch_dtype=torch_dtype,
            safety_checker=None
        )
    return pipe

# プロンプト埋め込みの取得
def get_embeddings(pipe, prompt, negative_prompt, split_character = ",", device_name = torch.device("cpu")):
    """プロンプトの埋め込みを取得します。"""
    max_length = pipe.tokenizer.model_max_length
    # Simple method of checking if the prompt is longer than the negative
    # prompt - split the input strings using `split_character`.
    count_prompt = len(prompt.split(split_character))
    count_negative_prompt = len(negative_prompt.split(split_character))

    # If prompt is longer than negative prompt.
    if count_prompt >= count_negative_prompt:
        input_ids = pipe.tokenizer(
            prompt, return_tensors = "pt", truncation = False
        ).input_ids.to(device_name)
        shape_max_length = input_ids.shape[-1]
        negative_ids = pipe.tokenizer(
            negative_prompt,
            truncation = False,
            padding = "max_length",
            max_length = shape_max_length,
            return_tensors = "pt"
        ).input_ids.to(device_name)

    # If negative prompt is longer than prompt.
    else:
        negative_ids = pipe.tokenizer(
            negative_prompt, return_tensors = "pt", truncation = False
        ).input_ids.to(device_name)
        shape_max_length = negative_ids.shape[-1]
        input_ids = pipe.tokenizer(
            prompt,
            return_tensors = "pt",
            truncation = False,
            padding = "max_length",
            max_length = shape_max_length
        ).input_ids.to(device_name)

    # Concatenate the individual prompt embeddings.
    concat_embeds = []
    neg_embeds = []
    for i in range(0, shape_max_length, max_length):
        concat_embeds.append(
            pipe.text_encoder(input_ids[:, i: i + max_length])[0]
        )
        neg_embeds.append(
            pipe.text_encoder(negative_ids[:, i: i + max_length])[0]
        )

    return torch.cat(concat_embeds, dim = 1), torch.cat(neg_embeds, dim = 1)
# 画像の生成と保存
def generate_and_save_images(pipe, prompt_embeds, negative_prompt_embeds, seeds, use_prompt_embeddings, guidance_scale, num_inference_steps, width, height, device_name):
    """画像を生成して保存します。"""
    images = []
    for count, seed in enumerate(seeds):
        start_time = time.time()

        if use_prompt_embeddings is False:
            new_img = pipe(
                prompt=prompt_embeds,
                negative_prompt=negative_prompt_embeds,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                num_images_per_prompt=1,
                generator=torch.manual_seed(seed),
            ).images
        else:
            new_img = pipe(
                prompt_embeds=prompt_embeds,
                negative_prompt_embeds=negative_prompt_embeds,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                num_images_per_prompt=1,
                generator=torch.manual_seed(seed),
            ).images

        images = images + new_img
    
    os.makedirs("images", exist_ok=True)
    for i, img in enumerate(images):
        img.save(f"images/image_{i}.png")

# ZIPファイルの作成とダウンロード
def create_and_download_zip():
    """画像をZIPファイルに固めてダウンロードします。"""
    with zipfile.ZipFile("images.zip", "w") as zipf:
        for root, _, files in os.walk("images"):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join("images", '..')))
    colab_files.download("images.zip")
    shutil.rmtree("images")

# メイン関数
def main():
    """メイン関数を実行します。"""
    with open('config.json', 'r') as f:
        config = json.load(f)
    
    # 設定ファイルを読み込み
    prompt = config["prompt"].strip()
    negative_prompt = config["negative_prompt"].strip()
    clip_skip = config["clip_skip"]
    model_path = config["model_path"]
    use_prompt_embeddings = config["use_prompt_embeddings"]
    start_idx = config["start_idx"]
    batch_size = config["batch_size"]
    seeds = [i for i in range(start_idx, start_idx + batch_size, 1)]
    num_inference_steps = config["num_inference_steps"]
    guidance_scale = config["guidance_scale"]
    width = config["width"]
    height = config["height"]

    device_name, torch_dtype = get_device_and_dtype()
    pipe = setup_pipeline(model_path, clip_skip, torch_dtype)
    pipe = pipe.to(device_name)
    pipe.scheduler = diffusers.EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
    
    prompt_embeds, negative_prompt_embeds = get_embeddings(pipe, prompt, negative_prompt, ",", device_name)
    
    generate_and_save_images(pipe, prompt_embeds, negative_prompt_embeds, seeds, use_prompt_embeddings, guidance_scale, num_inference_steps, width, height, device_name)
    
    create_and_download_zip()