Training procedure
Framework versions
- SWIFT 1.5.1
Base model information
情感分类模型
使用默认notebook中的环境与swift-llm进行进行训练。 数据集使用:jdsentimentzh
#
训练参数设置: outputdir:"/mnt/workspace/output/qwen-18b/v0-20240122-133753" overwriteoutputdir:false dotrain:false doeval:true dopredict:false evaluationstrategy:"steps" predictionlossonly:false perdevicetrainbatchsize:1 perdeviceevalbatchsize:1 pergputrainbatchsize:null pergpuevalbatchsize:null gradientaccumulationsteps:16 evalaccumulationsteps:null evaldelay:0 learningrate:0.0001 weightdecay:0.01 adambeta1:0.9 adambeta2:0.999 adamepsilon:1e-8 maxgradnorm:0.5 numtrainepochs:1 maxsteps:-1 lrschedulertype:"linear" lrschedulerkwargs: warmupratio:0.05 warmupsteps:0 loglevel:"passive" loglevelreplica:"warning" logoneachnode:true loggingdir:"/mnt/workspace/output/qwen-18b/v0-20240122-133753/runs" loggingstrategy:"steps" loggingfirststep:false loggingsteps:5 loggingnaninffilter:true savestrategy:"steps" savesteps:50 savetotallimit:2 savesafetensors:true saveoneachnode:true saveonlymodel:false nocuda:false usecpu:false usempsdevice:false seed:42 dataseed:null jitmodeeval:false useipex:false bf16:true fp16:false fp16optlevel:"O1" halfprecisionbackend:"auto" bf16fulleval:false fp16fulleval:false tf32:null localrank:0 ddpbackend:"nccl" tpunumcores:null tpumetricsdebug:false debug: dataloaderdroplast:false evalsteps:50 dataloadernumworkers:1 pastindex:-1 runname:"/mnt/workspace/output/qwen-18b/v0-20240122-133753" disabletqdm:false removeunusedcolumns:false labelnames:null loadbestmodelatend:true metricforbestmodel:"loss" greaterisbetter:false ignoredataskip:false fsdp: fsdpminnumparams:0 minnumparams:0 xla:false xlafsdpgradckpt:false fsdptransformerlayerclstowrap:null deepspeed:null labelsmoothingfactor:0 optim:"adamwtorch" optimargs:null adafactor:false groupbylength:false lengthcolumnname:"length" ddpfindunusedparameters:null ddpbucketcapmb:null ddpbroadcastbuffers:null dataloaderpinmemory:true dataloaderpersistentworkers:false skipmemorymetrics:true uselegacypredictionloop:false pushtohub:false resumefromcheckpoint:null hubmodelid:"qwen-18b-lora" hubstrategy:"everysave" hubtoken:null hubprivaterepo:true hubalwayspush:false gradientcheckpointing:true gradientcheckpointingkwargs:null includeinputsformetrics:false fp16backend:"auto" pushtohubmodelid:null pushtohuborganization:null pushtohubtoken:null mpparameters:"" autofindbatchsize:false fulldeterminism:false torchdynamo:null rayscope:"last" ddptimeout:1800 torchcompile:false torchcompilebackend:null torchcompilemode:null dispatchbatches:null splitbatches:false includetokenspersecond:false includenuminputtokensseen:false neftunenoisealpha:null sortishsampler:true predictwithgenerate:false generationmaxlength:null generationnumbeams:null generationconfig:"GenerationConfig { "chatformat": "raw", "dosample": true, "eostokenid": 151643, "maxnewtokens": 2048, "padtokenid": 151643, "repetitionpenalty": 1.05, "stopwordsids": [ [ 151643 ] ], "temperature": 0.3, "topk": 20, "topp": 0.7 } " trainsamplerrandom:true pushhubstrategy:"pushbest" accstrategy:"token" additionalsavedfiles: distributed_state:"Distributed environment: NO Num processes: 1 Process index: 0 Local process index: 0 Device: cuda " _ngpu:1 cachedsetupdevices:"device(type='cuda', index=0)" deepspeed_plugin:null
#
训练文件: {"loss": 3.6920166, "acc": 0.00625, "learningrate": 7.143e-05, "epoch": 0.04, "globalstep": 5} {"loss": 0.97254028, "acc": 0.64375, "learningrate": 9.746e-05, "epoch": 0.08, "globalstep": 10} {"loss": 0.20227733, "acc": 0.925, "learningrate": 9.322e-05, "epoch": 0.12, "globalstep": 15} {"loss": 0.13709036, "acc": 0.95, "learningrate": 8.898e-05, "epoch": 0.16, "globalstep": 20} {"loss": 0.20261045, "acc": 0.93125, "learningrate": 8.475e-05, "epoch": 0.2, "globalstep": 25} {"loss": 0.21394541, "acc": 0.95, "learningrate": 8.051e-05, "epoch": 0.24, "globalstep": 30} {"loss": 0.078833, "acc": 0.975, "learningrate": 7.627e-05, "epoch": 0.28, "globalstep": 35} {"loss": 0.19900661, "acc": 0.94375, "learningrate": 7.203e-05, "epoch": 0.32, "globalstep": 40} {"loss": 0.15380244, "acc": 0.9375, "learningrate": 6.78e-05, "epoch": 0.36, "globalstep": 45} {"loss": 0.16312559, "acc": 0.91875, "learningrate": 6.356e-05, "epoch": 0.4, "globalstep": 50} {"evalloss": 0.09244571, "evalacc": 0.95, "evalruntime": 0.6682, "evalsamplespersecond": 29.929, "evalstepspersecond": 29.929, "epoch": 0.4, "globalstep": 50} {"loss": 0.13774208, "acc": 0.95, "learningrate": 5.932e-05, "epoch": 0.44, "globalstep": 55} {"loss": 0.08817277, "acc": 0.98125, "learningrate": 5.508e-05, "epoch": 0.48, "globalstep": 60} {"loss": 0.10711486, "acc": 0.95625, "learningrate": 5.085e-05, "epoch": 0.52, "globalstep": 65} {"loss": 0.21884944, "acc": 0.94375, "learningrate": 4.661e-05, "epoch": 0.56, "globalstep": 70} {"loss": 0.12763548, "acc": 0.95625, "learningrate": 4.237e-05, "epoch": 0.6, "globalstep": 75} {"loss": 0.1417256, "acc": 0.9375, "learningrate": 3.814e-05, "epoch": 0.64, "globalstep": 80} {"loss": 0.16512451, "acc": 0.95, "learningrate": 3.39e-05, "epoch": 0.68, "globalstep": 85} {"loss": 0.16839138, "acc": 0.94375, "learningrate": 2.966e-05, "epoch": 0.72, "globalstep": 90} {"loss": 0.11108458, "acc": 0.9375, "learningrate": 2.542e-05, "epoch": 0.76, "globalstep": 95} {"loss": 0.13994989, "acc": 0.91875, "learningrate": 2.119e-05, "epoch": 0.8, "globalstep": 100} {"evalloss": 0.05509659, "evalacc": 0.975, "evalruntime": 0.6701, "evalsamplespersecond": 29.847, "evalstepspersecond": 29.847, "epoch": 0.8, "globalstep": 100} {"loss": 0.12323895, "acc": 0.9625, "learningrate": 1.695e-05, "epoch": 0.84, "globalstep": 105} {"loss": 0.15396147, "acc": 0.94375, "learningrate": 1.271e-05, "epoch": 0.88, "globalstep": 110} {"loss": 0.12936571, "acc": 0.95, "learningrate": 8.47e-06, "epoch": 0.92, "globalstep": 115} {"loss": 0.10359797, "acc": 0.9625, "learningrate": 4.24e-06, "epoch": 0.96, "globalstep": 120} {"loss": 0.17067654, "acc": 0.9375, "learningrate": 0.0, "epoch": 1.0, "globalstep": 125} {"evalloss": 0.06561256, "evalacc": 0.975, "evalruntime": 0.6797, "evalsamplespersecond": 29.426, "evalstepspersecond": 29.426, "epoch": 1.0, "globalstep": 125} {"trainruntime": 263.571, "trainsamplespersecond": 7.588, "trainstepspersecond": 0.474, "totalflos": 898775689334784.0, "trainloss": 0.32407517, "epoch": 1.0, "globalstep": 125}
#
#
示例代码: import os os.environ['CUDAVISIBLEDEVICES'] = '0' import torch from swift.llm import ( DatasetName, InferArguments, ModelType, SftArguments, infermain, sftmain, appuimain, mergeloramain ) modeltype = ModelType.qwen1_8b
超参数:https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.md
sftargs = SftArguments( modeltype=modeltype, sfttype='lora', traindatasetsample=2000, dataset=[DatasetName.jdsentimentzh], outputdir='output') result = sftmain(sftargs) bestmodelcheckpoint = result['bestmodelcheckpoint'] print(f'bestmodelcheckpoint: {bestmodelcheckpoint}') torch.cuda.emptycache() inferargs = InferArguments( ckptdir=bestmodelcheckpoint, loaddatasetconfig=True, dosample=False) result = infermain(infer_args)
#
#
推理效果 [INPUT] Task: Sentiment Classification Sentence: 不好!!不清楚呢!!感觉盗版的!! Category: negative, positive Output: negative<|endoftext|>
评论