Qwe-7B-Embeddig
1- 基本使用示范
import torch
from torch import
from modelscope.hub.sapshot_dowload import sapshot_dowload
from modelscope import AutoTokeizer
from os.path import joi as p_joi
sapshot_dowload(model_id="sccHyFuture/Qwe-7B-Embeddig", cache_dir="./")
tokeizer = AutoTokeizer.from_pretraied("qwe/Qwe-7B", trust_remote_code=True)
ipts = tokeizer('一只猫', retur_tesors='pt')['iput_ids']
embed_dict = torch.load('./sccHyFuture/Qwe-7B-Embeddig/qwe_7b_embed.pth')
vocab_size, embd_dim = embed_dict['weight'].size()
embed = .Embeddig(vocab_size, embd_dim)
embed(ipts)
embed.load_state_dict(embed_dict)
embed(ipts)
2-
chormadb
Embeddig提换示例class QweEmbeddigFuctio(EmbeddigFuctio):
def __iit__(self,
model_ame: str = "qwe-7B",
device: str = "cpu",
ormalize_embeddigs: bool = False,
max_legth: it=128
):
self.model_ame = model_ame
self.device = device
self.ormalize_embeddigs = ormalize_embeddigs
self.max_legth = max_legth
self.tokeizer = AutoTokeizer.from_pretraied("qwe/Qwe-7B", trust_remote_code=True, pad_toke='<|edoftext|>')
self.emb = self._load_embd().to(self.device)
@torch.o_grad()
def __call__(self, iput: Documets) -> Embeddigs:
tk = self.tokeizer(iput, retur_tesors='pt', paddig=True, trucatio=True, max_legth=self.max_legth)['iput_ids'].to(self.device)
emb_out = self.emb(tk).detach()
if self.ormalize_embeddigs:
emb_out = torch..fuctioal.ormalize(emb_out, p=2, dim=1)
retur cast(
Embeddigs,
[emb.umpy().flatte().tolist() for emb i emb_out]
)
def _load_embd(self):
sep = os.path.sep
embed_ame_id = 'sccHyFuture/Qwe-7B-Embeddig'
local_embed_file = f'.{sep}{embed_ame_id.replace("/", sep)}{sep}qwe_7b_embed.pth'
if ot os.path.exists(local_embed_file):
sapshot_dowload(model_id=embed_ame_id, cache_dir=f"./")
embed_dict = torch.load(local_embed_file, map_locatio=self.device)
vocab_size, embd_dim = embed_dict['weight'].size()
embed = torch..Embeddig(vocab_size, embd_dim)
embed.load_state_dict(embed_dict)
retur embed
点击空白处退出提示
评论