From 1ecf588c862b6a19242b88ae41868eb9fbc5a118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=A1=8C=E5=97=94?= Date: Mon, 24 Oct 2022 20:56:58 +0800 Subject: [PATCH] update finetune --- modelscope/metrics/ciderD/__init__.py | 1 + modelscope/metrics/ciderD/ciderD.py | 57 +++++ modelscope/metrics/ciderD/ciderD_scorer.py | 233 ++++++++++++++++++ .../multi_modal/ofa/ofa_trainer_utils.py | 4 +- 4 files changed, 293 insertions(+), 2 deletions(-) create mode 100755 modelscope/metrics/ciderD/__init__.py create mode 100755 modelscope/metrics/ciderD/ciderD.py create mode 100755 modelscope/metrics/ciderD/ciderD_scorer.py diff --git a/modelscope/metrics/ciderD/__init__.py b/modelscope/metrics/ciderD/__init__.py new file mode 100755 index 00000000..3f7d85bb --- /dev/null +++ b/modelscope/metrics/ciderD/__init__.py @@ -0,0 +1 @@ +__author__ = 'tylin' diff --git a/modelscope/metrics/ciderD/ciderD.py b/modelscope/metrics/ciderD/ciderD.py new file mode 100755 index 00000000..05c7eb23 --- /dev/null +++ b/modelscope/metrics/ciderD/ciderD.py @@ -0,0 +1,57 @@ +# Filename: ciderD.py +# +# Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric +# by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) +# +# Creation Date: Sun Feb 8 14:16:54 2015 +# +# Authors: Ramakrishna Vedantam and Tsung-Yi Lin +from __future__ import absolute_import, division, print_function + +from .ciderD_scorer import CiderScorer + + +class CiderD: + """ + Main Class to compute the CIDEr metric + + """ + + def __init__(self, n=4, sigma=6.0, df='corpus'): + # set cider to sum over 1 to 4-grams + self._n = n + # set the standard deviation parameter for gaussian penalty + self._sigma = sigma + # set which where to compute document frequencies from + self._df = df + self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df) + + def compute_score(self, gts, res): + """ + Main function to compute CIDEr score + :param hypo_for_image (dict) : dictionary with key and value + ref_for_image (dict) : dictionary with key and value + :return: cider (float) : computed CIDEr score for the corpus + """ # noqa + + # clear all the previous hypos and refs + tmp_cider_scorer = self.cider_scorer.copy_empty() + tmp_cider_scorer.clear() + for res_id in res: + + hypo = res_id['caption'] + ref = gts[res_id['image_id']] + + # Sanity check. + assert (type(hypo) is list) + assert (len(hypo) == 1) + assert (type(ref) is list) + assert (len(ref) > 0) + tmp_cider_scorer += (hypo[0], ref) + + (score, scores) = tmp_cider_scorer.compute_score() + + return score, scores + + def method(self): + return 'CIDEr-D' diff --git a/modelscope/metrics/ciderD/ciderD_scorer.py b/modelscope/metrics/ciderD/ciderD_scorer.py new file mode 100755 index 00000000..4157ec11 --- /dev/null +++ b/modelscope/metrics/ciderD/ciderD_scorer.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +# Tsung-Yi Lin +# Ramakrishna Vedantam +from __future__ import absolute_import, division, print_function +import copy +import math +import os +import pdb +from collections import defaultdict + +import numpy as np +import six +from six.moves import cPickle + + +def precook(s, n=4, out=False): + """ + Takes a string as input and returns an object that can be given to + either cook_refs or cook_test. This is optional: cook_refs and cook_test + can take string arguments as well. + :param s: string : sentence to be converted into ngrams + :param n: int : number of ngrams for which representation is calculated + :return: term frequency vector for occuring ngrams + """ + words = s.split() + counts = defaultdict(int) + for k in range(1, n + 1): + for i in range(len(words) - k + 1): + ngram = tuple(words[i:i + k]) + counts[ngram] += 1 + return counts + + +def cook_refs(refs, n=4): # lhuang: oracle will call with "average" + '''Takes a list of reference sentences for a single segment + and returns an object that encapsulates everything that BLEU + needs to know about them. + :param refs: list of string : reference sentences for some image + :param n: int : number of ngrams for which (ngram) representation is calculated + :return: result (list of dict) + ''' + return [precook(ref, n) for ref in refs] + + +def cook_test(test, n=4): + '''Takes a test sentence and returns an object that + encapsulates everything that BLEU needs to know about it. + :param test: list of string : hypothesis sentence for some image + :param n: int : number of ngrams for which (ngram) representation is calculated + :return: result (dict) + ''' + return precook(test, n, True) + + +class CiderScorer(object): + """CIDEr scorer. + """ + + def copy(self): + ''' copy the refs.''' + new = CiderScorer(n=self.n) + new.ctest = copy.copy(self.ctest) + new.crefs = copy.copy(self.crefs) + return new + + def copy_empty(self): + new = CiderScorer(df_mode='corpus', n=self.n, sigma=self.sigma) + new.df_mode = self.df_mode + new.ref_len = self.ref_len + new.document_frequency = self.document_frequency + return new + + def __init__(self, df_mode='corpus', test=None, refs=None, n=4, sigma=6.0): + ''' singular instance ''' + self.n = n + self.sigma = sigma + self.crefs = [] + self.ctest = [] + self.df_mode = df_mode + self.ref_len = None + if self.df_mode != 'corpus': + pkl_file = cPickle.load( + open(df_mode, 'rb'), + **(dict(encoding='latin1') if six.PY3 else {})) + self.ref_len = np.log(float(pkl_file['ref_len'])) + self.document_frequency = pkl_file['document_frequency'] + else: + self.document_frequency = None + self.cook_append(test, refs) + + def clear(self): + self.crefs = [] + self.ctest = [] + + def cook_append(self, test, refs): + '''called by constructor and __iadd__ to avoid creating new instances.''' + + if refs is not None: + self.crefs.append(cook_refs(refs)) + if test is not None: + self.ctest.append(cook_test(test)) # N.B.: -1 + else: + self.ctest.append( + None) # lens of crefs and ctest have to match + + def size(self): + assert len(self.crefs) == len( + self.ctest), 'refs/test mismatch! %d<>%d' % (len( + self.crefs), len(self.ctest)) + return len(self.crefs) + + def __iadd__(self, other): + '''add an instance (e.g., from another sentence).''' + + if type(other) is tuple: + # avoid creating new CiderScorer instances + self.cook_append(other[0], other[1]) + else: + self.ctest.extend(other.ctest) + self.crefs.extend(other.crefs) + + return self + + def compute_doc_freq(self): + """ + Compute term frequency for reference data. + This will be used to compute idf (inverse document frequency later) + The term frequency is stored in the object + :return: None + """ + for refs in self.crefs: + # refs, k ref captions of one image + for ngram in set([ + ngram for ref in refs for (ngram, count) in ref.items() + ]): # noqa + self.document_frequency[ngram] += 1 + + def compute_cider(self): + + def counts2vec(cnts): + """ + Function maps counts of ngram to vector of tfidf weights. + The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights. + The n-th entry of array denotes length of n-grams. + :param cnts: + :return: vec (array of dict), norm (array of float), length (int) + """ + vec = [defaultdict(float) for _ in range(self.n)] + length = 0 + norm = [0.0 for _ in range(self.n)] + for (ngram, term_freq) in cnts.items(): + # give word count 1 if it doesn't appear in reference corpus + df = np.log(max(1.0, self.document_frequency[ngram])) + # ngram index + n = len(ngram) - 1 + # tf (term_freq) * idf (precomputed idf) for n-grams + vec[n][ngram] = float(term_freq) * (self.ref_len - df) + # compute norm for the vector. the norm will be used for computing similarity + norm[n] += pow(vec[n][ngram], 2) + + if n == 1: + length += term_freq + norm = [np.sqrt(n) for n in norm] + return vec, norm, length + + def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref): + ''' + Compute the cosine similarity of two vectors. + :param vec_hyp: array of dictionary for vector corresponding to hypothesis + :param vec_ref: array of dictionary for vector corresponding to reference + :param norm_hyp: array of float for vector corresponding to hypothesis + :param norm_ref: array of float for vector corresponding to reference + :param length_hyp: int containing length of hypothesis + :param length_ref: int containing length of reference + :return: array of score for each n-grams cosine similarity + ''' + delta = float(length_hyp - length_ref) + # measure consine similarity + val = np.array([0.0 for _ in range(self.n)]) + for n in range(self.n): + # ngram + for (ngram, count) in vec_hyp[n].items(): + # vrama91 : added clipping + val[n] += min(vec_hyp[n][ngram], + vec_ref[n][ngram]) * vec_ref[n][ngram] + + if (norm_hyp[n] != 0) and (norm_ref[n] != 0): + val[n] /= (norm_hyp[n] * norm_ref[n]) + + assert (not math.isnan(val[n])) + # vrama91: added a length based gaussian penalty + val[n] *= np.e**(-(delta**2) / (2 * self.sigma**2)) + return val + + # compute log reference length + if self.df_mode == 'corpus': + self.ref_len = np.log(float(len(self.crefs))) + # elif self.df_mode == "coco-val-df": + # if coco option selected, use length of coco-val set + # self.ref_len = np.log(float(40504)) + + scores = [] + for test, refs in zip(self.ctest, self.crefs): + # compute vector for test captions + vec, norm, length = counts2vec(test) + # compute vector for ref captions + score = np.array([0.0 for _ in range(self.n)]) + for ref in refs: + vec_ref, norm_ref, length_ref = counts2vec(ref) + score += sim(vec, vec_ref, norm, norm_ref, length, length_ref) + # change by vrama91 - mean of ngram scores, instead of sum + score_avg = np.mean(score) + # divide by number of references + score_avg /= len(refs) + # multiply score by 10 + score_avg *= 10.0 + # append score of an image to the score list + scores.append(score_avg) + return scores + + def compute_score(self, option=None, verbose=0): + # compute idf + if self.df_mode == 'corpus': + self.document_frequency = defaultdict(float) + self.compute_doc_freq() + # assert to check document frequency + assert (len(self.ctest) >= max(self.document_frequency.values())) + # import json for now and write the corresponding files + # compute cider score + score = self.compute_cider() + # debug + # print score + return np.mean(np.array(score)), np.array(score) diff --git a/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py b/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py index b2e54ec6..2189a5db 100644 --- a/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py +++ b/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py @@ -83,8 +83,8 @@ def label_smoothed_nll_loss(lprobs, lprobs = lprobs[indices] ntokens = loss.numel() - nll_loss = nll_loss.sum() - loss = loss.sum() + nll_loss = nll_loss.sum() / ntokens # 后面在grads里面处理 + loss = loss.sum() / ntokens # 后面在grads里面处理 if use_rdrop: true_batch_size = lprobs.size(0) // 2 p = lprobs[:true_batch_size]