A experimetal versio of IP-Adapter-FaceID: we use face ID embeddig from a face recogitio model istead of CLIP image embeddig, additioally, we use LoRA to improve ID cosistecy. IP-Adapter-FaceID ca geerate various style images coditioed o a face with oly text prompts. Firstly, you should use isightface to extract face ID embeddig:Itroductio
Usage
pip istall mxet
pip istall isightface>=0.2
pip istall ip_adapter
import cv2
from isightface.app import FaceAalysis
import torch
from diffusers import StableDiffusioPipelie, DDIMScheduler, AutoecoderKL
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID
from PIL import Image
from modelscope import sapshot_dowload
app = FaceAalysis(ame="buffalo_l",root="/root/.isightface/models" ,providers=['CUDAExecutioProvider', 'CPUExecutioProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))
image = cv2.imread("/mt/workspace/yk_dir/perso.jpg")
faces = app.get(image)
faceid_embeds = torch.from_umpy(faces[0].ormed_embeddig).usqueeze(0)
base_model_path = "AI-ModelScope/Realistic_Visio_V5.1_oVAE"
local_base = sapshot_dowload(base_model_path,revisio='master')
vae_model_path = "zhuzhukeji/sd-vae-ft-mse"
local_vae = sapshot_dowload(vae_model_path,revisio='master')
local_ip = sapshot_dowload("AI-ModelScope/IP-Adapter-FaceID",revisio='master')
ip_ckpt = local_ip+"/"+"ip-adapter-faceid_sd15.bi"
device = "cuda"
oise_scheduler = DDIMScheduler(
um_trai_timesteps=1000,
beta_start=0.00085,
beta_ed=0.012,
beta_schedule="scaled_liear",
clip_sample=False,
set_alpha_to_oe=False,
steps_offset=1,
)
vae = AutoecoderKL.from_pretraied(local_vae).to(dtype=torch.float16)
pipe = StableDiffusioPipelie.from_pretraied(
local_base,
torch_dtype=torch.float16,
scheduler=oise_scheduler,
vae=vae,
feature_extractor=Noe,
safety_checker=Noe
)
# load ip-adapter
ip_model = IPAdapterFaceID(pipe, ip_ckpt, device)
# geerate image
prompt = "photo of a woma i red dress i a garde"
egative_prompt = "moochrome, lowres, bad aatomy, worst quality, low quality, blurry"
images = ip_model.geerate(
prompt=prompt, egative_prompt=egative_prompt, faceid_embeds=faceid_embeds, um_samples=4, width=512, height=768, um_iferece_steps=30, seed=2023
)
images[0].save("woma.pg")
Limitatios ad Bias
No-commercial use
点击空白处退出提示
评论