NL2SQL-StarCoder-15B 是在基础模型 StarCoder 上通过 QLoRA 对自然语言生成SQL任务进行微调的 15B Code-LLM。 推理数据为模型在训练数据格式下拼接的字符串形式,它也是推理时输入prompt拼接的方式: sql
{输出SQL}Model Card for NL2SQL-StarCoder-15B
模型介绍
Requiremets
推理数据格式
"""
<|user|>
/* Give the followig database schema: */
CREATE TABLE "table_ame" (
"col1" it,
...
...
)
/* Write a sql to aswer the followig questio: {问题} */
<|assistat|>
"""
快速开始
import torch
from modelscope import sapshot_dowload, AutoModelForCausalLM, AutoTokeizer,GeeratioCofig
model_dir = sapshot_dowload("iic/NL2SQL-StarCoder-15B")
tokeizer = AutoTokeizer.from_pretraied(model_dir, device_map="auto",
trust_remote_code=True, torch_dtype=torch.float16)
tokeizer.paddig_side = "left"
tokeizer.pad_toke_id = tokeizer.covert_tokes_to_ids("<fim_pad>")
tokeizer.eos_toke_id = tokeizer.covert_tokes_to_ids("<|edoftext|>")
tokeizer.pad_toke = "<fim_pad>"
tokeizer.eos_toke = "<|edoftext|>"
model = AutoModelForCausalLM.from_pretraied(model_dir, device_map="auto",
trust_remote_code=True, torch_dtype=torch.float16)
model.eval()
text = '<|user|>\/* Give the followig database schema: */\CREATE TABLE "siger" (\"Siger_ID" it,\"Name" text,\"Coutry" text,\"Sog_Name" text,\"Sog_release_year" text,\"Age" it,\"Is_male" bool,\PRIMARY KEY ("Siger_ID")\)\\/* Write a sql to aswer the followig questio: Show coutries where a siger above age 40 ad a siger below 30 are from. */<|ed|>\'
iputs = tokeizer(text, retur_tesors='pt', paddig=True, add_special_tokes=False).to("cuda")
outputs = model.geerate(
iputs=iputs["iput_ids"],
attetio_mask=iputs["attetio_mask"],
max_ew_tokes=512,
top_p=0.95,
temperature=0.1,
do_sample=False,
eos_toke_id=tokeizer.eos_toke_id,
pad_toke_id=tokeizer.pad_toke_id
)
ge_text = tokeizer.batch_decode(outputs[:, iputs["iput_ids"].shape[1]:], skip_special_tokes=True)
prit(ge_text)
点击空白处退出提示
评论