| @@ -46,7 +46,7 @@ pipeline { | |||||
| agent { | agent { | ||||
| docker { | docker { | ||||
| image 'fnlp:torch-1.6' | image 'fnlp:torch-1.6' | ||||
| args '-u root:root -v ${JENKINS_HOME}/html/docs:/docs -v ${JENKINS_HOME}/html/_ci:/ci --gpus all' | |||||
| args '-u root:root -v ${JENKINS_HOME}/html/docs:/docs -v ${JENKINS_HOME}/html/_ci:/ci --gpus all --shm-size 1G' | |||||
| } | } | ||||
| } | } | ||||
| steps { | steps { | ||||
| @@ -62,7 +62,7 @@ pipeline { | |||||
| agent { | agent { | ||||
| docker { | docker { | ||||
| image 'fnlp:paddle' | image 'fnlp:paddle' | ||||
| args '-u root:root -v ${JENKINS_HOME}/html/docs:/docs -v ${JENKINS_HOME}/html/_ci:/ci --gpus all' | |||||
| args '-u root:root -v ${JENKINS_HOME}/html/docs:/docs -v ${JENKINS_HOME}/html/_ci:/ci --gpus all --shm-size 1G' | |||||
| } | } | ||||
| } | } | ||||
| steps { | steps { | ||||
| @@ -82,7 +82,7 @@ pipeline { | |||||
| // agent { | // agent { | ||||
| // docker { | // docker { | ||||
| // image 'fnlp:jittor' | // image 'fnlp:jittor' | ||||
| // args '-u root:root -v ${JENKINS_HOME}/html/docs:/docs -v ${JENKINS_HOME}/html/_ci:/ci --gpus all' | |||||
| // args '-u root:root -v ${JENKINS_HOME}/html/docs:/docs -v ${JENKINS_HOME}/html/_ci:/ci --gpus all --shm-size 1G' | |||||
| // } | // } | ||||
| // } | // } | ||||
| // steps { | // steps { | ||||
| @@ -25,11 +25,11 @@ def prepare_env(): | |||||
| def train_model(model, src_words_idx, tgt_words_idx, tgt_seq_len, src_seq_len): | def train_model(model, src_words_idx, tgt_words_idx, tgt_seq_len, src_seq_len): | ||||
| optimizer = optim.Adam(model.parameters(), lr=1e-2) | |||||
| optimizer = optim.Adam(model.parameters(), lr=5e-3) | |||||
| mask = seq_len_to_mask(tgt_seq_len).eq(0) | mask = seq_len_to_mask(tgt_seq_len).eq(0) | ||||
| target = tgt_words_idx.masked_fill(mask, -100) | target = tgt_words_idx.masked_fill(mask, -100) | ||||
| for i in range(100): | |||||
| for i in range(50): | |||||
| optimizer.zero_grad() | optimizer.zero_grad() | ||||
| pred = model(src_words_idx, tgt_words_idx, src_seq_len)['pred'] # bsz x max_len x vocab_size | pred = model(src_words_idx, tgt_words_idx, src_seq_len)['pred'] # bsz x max_len x vocab_size | ||||
| loss = F.cross_entropy(pred.transpose(1, 2), target) | loss = F.cross_entropy(pred.transpose(1, 2), target) | ||||