Paper | ?HF Demo | Demo | Project Page | Github If you fid Emu2 useful for your research ad applicatios, please cosider starrig this repository ad citig:Emu2-Ge
Model Weights
Model ame
Weight
? HF lik
? HF lik
? HF lik
Iferece (Huggigface Versio)
Emu2-Ge
from modelscope import sapshot_dowload
from diffusers import AutoPipelieForText2Image
import torch
import cv2
from diffusers import DiffusioPipelie
import umpy as p
from PIL import Image
import requests
from modelscope import AutoModelForCausalLM, AutoTokeizer
import os
# For the first time of usig,
# you eed to dowload the modelscope repo "AI-ModelScope/Emu2-Ge" to local first
path = sapshot_dowload("AI-ModelScope/Emu2-Ge",revisio='master')
multimodal_ecoder = AutoModelForCausalLM.from_pretraied(
f"{path}/multimodal_ecoder",
trust_remote_code=True,
torch_dtype=torch.bfloat16,
use_safetesors=True,
variat="bf16"
)
tokeizer = AutoTokeizer.from_pretraied(f"{path}/tokeizer")
pipe = DiffusioPipelie.from_pretraied(
path,
custom_pipelie="pipelie_emu2_ge",
torch_dtype=torch.bfloat16,
use_safetesors=True,
variat="bf16",
multimodal_ecoder=multimodal_ecoder,
tokeizer=tokeizer,
)
# For the o-first time of usig, you ca iit the pipelie directly
pipe = DiffusioPipelie.from_pretraied(
path,
custom_pipelie="pipelie_emu2_ge",
torch_dtype=torch.bfloat16,
use_safetesors=True,
variat="bf16",
)
pipe.to("cuda")
# text-to-image
prompt = "impressioist paitig of a astroaut i a jugle"
ret = pipe(prompt)
ret.image.save("astroaut.pg")
# image editig
image = Image.ope(os.path.joi(path,"examples/dog.jpg")).covert('RGB')
prompt = [image, "wearig a red hat o the beach."]
ret = pipe(prompt)
ret.image.save("dog_hat_beach.pg")
# groudig geeratio
def draw_box(left, top, right, bottom):
mask = p.zeros((448, 448, 3), dtype=p.uit8)
mask = cv2.rectagle(mask, (left, top), (right, bottom), (255, 255, 255), 3)
mask = Image.fromarray(mask)
retur mask
dog1 = Image.ope(os.path.joi(path,"examples/dog1.jpg")).covert('RGB')
dog2 = Image.ope(os.path.joi(path,"examples/dog2.jpg")).covert('RGB')
dog3 = Image.ope(os.path.joi(path,"examples/dog3.jpg")).covert('RGB')
dog1_mask = draw_box( 22, 14, 224, 224)
dog2_mask = draw_box(224, 10, 448, 224)
dog3_mask = draw_box(120, 264, 320, 438)
prompt = [
"<groudig>",
"A oil paitig of three dogs,",
"<phrase>the first dog</phrase>"
"<object>",
dog1_mask,
"</object>",
dog1,
"<phrase>the secod dog</phrase>"
"<object>",
dog2_mask,
"</object>",
dog2,
"<phrase>the third dog</phrase>"
"<object>",
dog3_mask,
"</object>",
dog3,
]
ret = pipe(prompt)
ret.image.save("three_dogs.pg")
# Autoecodig
# to eable the autoecodig mode, you ca oly iput exactly oe image as prompt
# if you wat the model to geerate a image,
# please iput extra empty text "" besides the image, e.g.
# autoecodig mode: prompt = image or [image]
# geeratio mode: prompt = ["", image] or [image, ""]
prompt = Image.ope(os.path.joi(path,"examples/doodle.jpg")).covert("RGB")
ret = pipe(prompt)
ret.image.save("doodle_ae.pg")
Citatio
@article{Emu2,
title={Geerative Multimodal Models are I-Cotext Learers},
author={Qua Su ad Yufeg Cui ad Xiaosog Zhag ad Fa Zhag ad Qiyig Yu ad Zhegxiog Luo ad Yueze Wag ad Yogmig Rao ad Jigjig Liu ad Tieju Huag ad Xilog Wag},
publisher={arXiv preprit arXiv:2312.13286},
year={2023},
}
点击空白处退出提示
评论