From f63c37fc1fa5b403b42c69ad6521916bda1fc111 Mon Sep 17 00:00:00 2001 From: ly119399 Date: Thu, 30 Jun 2022 14:09:29 +0800 Subject: [PATCH] add test case --- .../preprocessors/space/dst_processors.py | 74 +-------------- .../nlp/test_dialog_state_tracking.py | 90 ++++++++++++++----- 2 files changed, 68 insertions(+), 96 deletions(-) diff --git a/modelscope/preprocessors/space/dst_processors.py b/modelscope/preprocessors/space/dst_processors.py index 12f7f1f8..1f9920a9 100644 --- a/modelscope/preprocessors/space/dst_processors.py +++ b/modelscope/preprocessors/space/dst_processors.py @@ -462,39 +462,6 @@ class multiwoz22Processor(DSTProcessor): utt_tok_list.append(self.tokenize( utt['text'])) # normalize utterances - # modified_slots = {} - - # If sys utt, extract metadata (identify and collect modified slots) - # if is_sys_utt: - # for d in utt['metadata']: - # booked = utt['metadata'][d]['book']['booked'] - # booked_slots = {} - # # Check the booked section - # if booked != []: - # for s in booked[0]: - # booked_slots[s] = self.normalize_label( - # '%s-%s' % (d, s), - # booked[0][s]) # normalize labels - # # Check the semi and the inform slots - # for category in ['book', 'semi']: - # for s in utt['metadata'][d][category]: - # cs = '%s-book_%s' % ( - # d, s) if category == 'book' else '%s-%s' % (d, - # s) - # value_label = self.normalize_label( - # cs, utt['metadata'][d][category] - # [s]) # normalize labels - # # Prefer the slot value as stored in the booked section - # if s in booked_slots: - # value_label = booked_slots[s] - # # Remember modified slots and entire dialog state - # if cs in slot_list and cumulative_labels[ - # cs] != value_label: - # modified_slots[cs] = value_label - # cumulative_labels[cs] = value_label - # - # mod_slots_list.append(modified_slots.copy()) - # Form proper (usr, sys) turns turn_itr = 0 diag_seen_slots_dict = {} @@ -938,8 +905,8 @@ def convert_examples_to_features(examples, # Account for [CLS], [SEP], [SEP], [SEP] with "- 4" (BERT) if len(tokens_a) + len(tokens_b) + len( history) > max_seq_length - model_specs['TOKEN_CORRECTION']: - logger.info('Truncate Example %s. Total len=%d.' % - (guid, len(tokens_a) + len(tokens_b) + len(history))) + # logger.info('Truncate Example %s. Total len=%d.' % + # (guid, len(tokens_a) + len(tokens_b) + len(history))) input_text_too_long = True else: input_text_too_long = False @@ -968,7 +935,6 @@ def convert_examples_to_features(examples, def _get_start_end_pos(class_type, token_label_ids, max_seq_length): if class_type == 'copy_value' and 1 not in token_label_ids: - # logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.") class_type = 'none' start_pos = 0 end_pos = 0 @@ -1045,9 +1011,6 @@ def convert_examples_to_features(examples, features = [] # Convert single example for (example_index, example) in enumerate(examples): - if example_index % 1000 == 0: - logger.info('Writing example %d of %d' % - (example_index, len(examples))) total_cnt += 1 @@ -1075,17 +1038,6 @@ def convert_examples_to_features(examples, model_specs, example.guid) if input_text_too_long: - if example_index < 10: - if len(token_labels_a) > len(tokens_a): - logger.info(' tokens_a truncated labels: %s' - % str(token_labels_a[len(tokens_a):])) - if len(token_labels_b) > len(tokens_b): - logger.info(' tokens_b truncated labels: %s' - % str(token_labels_b[len(tokens_b):])) - if len(token_labels_history) > len(tokens_history): - logger.info( - ' tokens_history truncated labels: %s' - % str(token_labels_history[len(tokens_history):])) token_labels_a = token_labels_a[:len(tokens_a)] token_labels_b = token_labels_b[:len(tokens_b)] @@ -1136,25 +1088,6 @@ def convert_examples_to_features(examples, assert (len(input_ids) == len(input_ids_unmasked)) - # if example_index < 10: - # logger.info('*** Example ***') - # logger.info('guid: %s' % (example.guid)) - # logger.info('tokens: %s' % ' '.join(tokens)) - # logger.info('input_ids: %s' % ' '.join([str(x) - # for x in input_ids])) - # logger.info('input_mask: %s' - # % ' '.join([str(x) for x in input_mask])) - # logger.info('segment_ids: %s' - # % ' '.join([str(x) for x in segment_ids])) - # logger.info('start_pos: %s' % str(start_pos_dict)) - # logger.info('end_pos: %s' % str(end_pos_dict)) - # logger.info('values: %s' % str(value_dict)) - # logger.info('inform: %s' % str(inform_dict)) - # logger.info('inform_slot: %s' % str(inform_slot_dict)) - # logger.info('refer_id: %s' % str(refer_id_dict)) - # logger.info('diag_state: %s' % str(diag_state_dict)) - # logger.info('class_label_id: %s' % str(class_label_id_dict)) - features.append( InputFeatures( guid=example.guid, @@ -1171,9 +1104,6 @@ def convert_examples_to_features(examples, diag_state=diag_state_dict, class_label_id=class_label_id_dict)) - logger.info('========== %d out of %d examples have text too long' % - (too_long_cnt, total_cnt)) - return features diff --git a/tests/pipelines/nlp/test_dialog_state_tracking.py b/tests/pipelines/nlp/test_dialog_state_tracking.py index 9c0e344f..fbb87b42 100644 --- a/tests/pipelines/nlp/test_dialog_state_tracking.py +++ b/tests/pipelines/nlp/test_dialog_state_tracking.py @@ -15,25 +15,66 @@ class DialogStateTrackingTest(unittest.TestCase): model_id = 'damo/nlp_space_dialog-state-tracking' test_case = [{ 'User-1': - 'am looking for a place to to stay that has cheap price range it should be in a type of hotel' + 'Hi, I\'m looking for a train that is going to cambridge and arriving there by 20:45, ' + 'is there anything like that?' }, { 'System-1': - 'Okay, do you have a specific area you want to stay in?', + 'There are over 1,000 trains like that. Where will you be departing from?', 'Dialog_Act-1': { - 'Hotel-Request': [['Area', '?']] + 'Train-Inform': [['Choice', 'over 1'], ['Choice', '000']], + 'Train-Request': [['Depart', '?']] }, - 'User-2': - "no, i just need to make sure it's cheap. oh, and i need parking" + 'User-2': 'I am departing from birmingham new street.' }, { - 'System-2': - 'I found 1 cheap hotel for you that includes parking. Do you like me to book it?', + 'System-2': 'Can you confirm your desired travel day?', 'Dialog_Act-2': { - 'Booking-Inform': [['none', 'none']], - 'Hotel-Inform': [['Price', 'cheap'], ['Choice', '1'], - ['Parking', 'none']] + 'Train-Request': [['Day', '?']] }, - 'User-3': - 'Yes, please. 6 people 3 nights starting on tuesday.' + 'User-3': 'I would like to leave on wednesday' + }, { + 'System-3': + 'I show a train leaving birmingham new street at 17:40 and arriving at 20:23 on Wednesday. ' + 'Will this work for you?', + 'Dialog_Act-3': { + 'Train-Inform': [['Arrive', '20:23'], ['Leave', '17:40'], + ['Day', 'Wednesday'], + ['Depart', 'birmingham new street']] + }, + 'User-4': + 'That will, yes. Please make a booking for 5 people please.', + }, { + 'System-4': + 'I\'ve booked your train tickets, and your reference number is A9NHSO9Y.', + 'Dialog_Act-4': { + 'Train-OfferBooked': [['Ref', 'A9NHSO9Y']] + }, + 'User-5': + 'Thanks so much. I would also need a place to say. ' + 'I am looking for something with 4 stars and has free wifi.' + }, { + 'System-5': + 'How about the cambridge belfry? ' + 'It has all the attributes you requested and a great name! ' + 'Maybe even a real belfry?', + 'Dialog_Act-5': { + 'Hotel-Recommend': [['Name', 'the cambridge belfry']] + }, + 'User-6': + 'That sounds great, could you make a booking for me please?', + }, { + 'System-6': + 'What day would you like your booking for?', + 'Dialog_Act-6': { + 'Booking-Request': [['Day', '?']] + }, + 'User-7': + 'Please book it for Wednesday for 5 people and 5 nights, please.', + }, { + 'System-7': 'Booking was successful. Reference number is : 5NAWGJDC.', + 'Dialog_Act-7': { + 'Booking-Book': [['Ref', '5NAWGJDC']] + }, + 'User-8': 'Thank you, goodbye', }] def test_run(self): @@ -51,21 +92,22 @@ class DialogStateTrackingTest(unittest.TestCase): preprocessor=preprocessor) ] - history_states = [{}] - utter = {} pipelines_len = len(pipelines) import json - for step, item in enumerate(self.test_case): - utter.update(item) - result = pipelines[step % pipelines_len]({ - 'utter': - utter, - 'history_states': - history_states - }) - print(json.dumps(result)) + for _test_case in self.test_case: + history_states = [{}] + utter = {} + for step, item in enumerate(_test_case): + utter.update(item) + result = pipelines[step % pipelines_len]({ + 'utter': + utter, + 'history_states': + history_states + }) + print(json.dumps(result)) - history_states.extend([result['dialog_states'], {}]) + history_states.extend([result['dialog_states'], {}]) @unittest.skip('test with snapshot_download') def test_run_with_model_from_modelhub(self):