From f63c37fc1fa5b403b42c69ad6521916bda1fc111 Mon Sep 17 00:00:00 2001
From: ly119399 <ly119399@alibaba-inc.com>
Date: Thu, 30 Jun 2022 14:09:29 +0800
Subject: [PATCH] add test case

---
 .../preprocessors/space/dst_processors.py     | 74 +--------------
 .../nlp/test_dialog_state_tracking.py         | 90 ++++++++++++++-----
 2 files changed, 68 insertions(+), 96 deletions(-)

diff --git a/modelscope/preprocessors/space/dst_processors.py b/modelscope/preprocessors/space/dst_processors.py
index 12f7f1f8..1f9920a9 100644
--- a/modelscope/preprocessors/space/dst_processors.py
+++ b/modelscope/preprocessors/space/dst_processors.py
@@ -462,39 +462,6 @@ class multiwoz22Processor(DSTProcessor):
                 utt_tok_list.append(self.tokenize(
                     utt['text']))  # normalize utterances
 
-            # modified_slots = {}
-
-            # If sys utt, extract metadata (identify and collect modified slots)
-            # if is_sys_utt:
-            #     for d in utt['metadata']:
-            #         booked = utt['metadata'][d]['book']['booked']
-            #         booked_slots = {}
-            #         # Check the booked section
-            #         if booked != []:
-            #             for s in booked[0]:
-            #                 booked_slots[s] = self.normalize_label(
-            #                     '%s-%s' % (d, s),
-            #                     booked[0][s])  # normalize labels
-            #         # Check the semi and the inform slots
-            #         for category in ['book', 'semi']:
-            #             for s in utt['metadata'][d][category]:
-            #                 cs = '%s-book_%s' % (
-            #                     d, s) if category == 'book' else '%s-%s' % (d,
-            #                                                                 s)
-            #                 value_label = self.normalize_label(
-            #                     cs, utt['metadata'][d][category]
-            #                     [s])  # normalize labels
-            #                 # Prefer the slot value as stored in the booked section
-            #                 if s in booked_slots:
-            #                     value_label = booked_slots[s]
-            #                 # Remember modified slots and entire dialog state
-            #                 if cs in slot_list and cumulative_labels[
-            #                         cs] != value_label:
-            #                     modified_slots[cs] = value_label
-            #                     cumulative_labels[cs] = value_label
-            #
-            # mod_slots_list.append(modified_slots.copy())
-
         # Form proper (usr, sys) turns
         turn_itr = 0
         diag_seen_slots_dict = {}
@@ -938,8 +905,8 @@ def convert_examples_to_features(examples,
         # Account for [CLS], [SEP], [SEP], [SEP] with "- 4" (BERT)
         if len(tokens_a) + len(tokens_b) + len(
                 history) > max_seq_length - model_specs['TOKEN_CORRECTION']:
-            logger.info('Truncate Example %s. Total len=%d.' %
-                        (guid, len(tokens_a) + len(tokens_b) + len(history)))
+            # logger.info('Truncate Example %s. Total len=%d.' %
+            #             (guid, len(tokens_a) + len(tokens_b) + len(history)))
             input_text_too_long = True
         else:
             input_text_too_long = False
@@ -968,7 +935,6 @@ def convert_examples_to_features(examples,
 
     def _get_start_end_pos(class_type, token_label_ids, max_seq_length):
         if class_type == 'copy_value' and 1 not in token_label_ids:
-            # logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.")
             class_type = 'none'
         start_pos = 0
         end_pos = 0
@@ -1045,9 +1011,6 @@ def convert_examples_to_features(examples,
     features = []
     # Convert single example
     for (example_index, example) in enumerate(examples):
-        if example_index % 1000 == 0:
-            logger.info('Writing example %d of %d' %
-                        (example_index, len(examples)))
 
         total_cnt += 1
 
@@ -1075,17 +1038,6 @@ def convert_examples_to_features(examples,
                 model_specs, example.guid)
 
             if input_text_too_long:
-                if example_index < 10:
-                    if len(token_labels_a) > len(tokens_a):
-                        logger.info('    tokens_a truncated labels: %s'
-                                    % str(token_labels_a[len(tokens_a):]))
-                    if len(token_labels_b) > len(tokens_b):
-                        logger.info('    tokens_b truncated labels: %s'
-                                    % str(token_labels_b[len(tokens_b):]))
-                    if len(token_labels_history) > len(tokens_history):
-                        logger.info(
-                            '    tokens_history truncated labels: %s'
-                            % str(token_labels_history[len(tokens_history):]))
 
                 token_labels_a = token_labels_a[:len(tokens_a)]
                 token_labels_b = token_labels_b[:len(tokens_b)]
@@ -1136,25 +1088,6 @@ def convert_examples_to_features(examples,
 
         assert (len(input_ids) == len(input_ids_unmasked))
 
-        # if example_index < 10:
-        #     logger.info('*** Example ***')
-        #     logger.info('guid: %s' % (example.guid))
-        #     logger.info('tokens: %s' % ' '.join(tokens))
-        #     logger.info('input_ids: %s' % ' '.join([str(x)
-        #                                             for x in input_ids]))
-        #     logger.info('input_mask: %s'
-        #                 % ' '.join([str(x) for x in input_mask]))
-        #     logger.info('segment_ids: %s'
-        #                 % ' '.join([str(x) for x in segment_ids]))
-        #     logger.info('start_pos: %s' % str(start_pos_dict))
-        #     logger.info('end_pos: %s' % str(end_pos_dict))
-        #     logger.info('values: %s' % str(value_dict))
-        #     logger.info('inform: %s' % str(inform_dict))
-        #     logger.info('inform_slot: %s' % str(inform_slot_dict))
-        #     logger.info('refer_id: %s' % str(refer_id_dict))
-        #     logger.info('diag_state: %s' % str(diag_state_dict))
-        #     logger.info('class_label_id: %s' % str(class_label_id_dict))
-
         features.append(
             InputFeatures(
                 guid=example.guid,
@@ -1171,9 +1104,6 @@ def convert_examples_to_features(examples,
                 diag_state=diag_state_dict,
                 class_label_id=class_label_id_dict))
 
-    logger.info('========== %d out of %d examples have text too long' %
-                (too_long_cnt, total_cnt))
-
     return features
 
 
diff --git a/tests/pipelines/nlp/test_dialog_state_tracking.py b/tests/pipelines/nlp/test_dialog_state_tracking.py
index 9c0e344f..fbb87b42 100644
--- a/tests/pipelines/nlp/test_dialog_state_tracking.py
+++ b/tests/pipelines/nlp/test_dialog_state_tracking.py
@@ -15,25 +15,66 @@ class DialogStateTrackingTest(unittest.TestCase):
     model_id = 'damo/nlp_space_dialog-state-tracking'
     test_case = [{
         'User-1':
-        'am looking for a place to to stay that has cheap price range it should be in a type of hotel'
+        'Hi, I\'m looking for a train that is going to cambridge and arriving there by 20:45, '
+        'is there anything like that?'
     }, {
         'System-1':
-        'Okay, do you have a specific area you want to stay in?',
+        'There are over 1,000 trains like that.  Where will you be departing from?',
         'Dialog_Act-1': {
-            'Hotel-Request': [['Area', '?']]
+            'Train-Inform': [['Choice', 'over 1'], ['Choice', '000']],
+            'Train-Request': [['Depart', '?']]
         },
-        'User-2':
-        "no, i just need to make sure it's cheap. oh, and i need parking"
+        'User-2': 'I am departing from birmingham new street.'
     }, {
-        'System-2':
-        'I found 1 cheap hotel for you that includes parking. Do you like me to book it?',
+        'System-2': 'Can you confirm your desired travel day?',
         'Dialog_Act-2': {
-            'Booking-Inform': [['none', 'none']],
-            'Hotel-Inform': [['Price', 'cheap'], ['Choice', '1'],
-                             ['Parking', 'none']]
+            'Train-Request': [['Day', '?']]
         },
-        'User-3':
-        'Yes, please. 6 people 3 nights starting on tuesday.'
+        'User-3': 'I would like to leave on wednesday'
+    }, {
+        'System-3':
+        'I show a train leaving birmingham new street at 17:40 and arriving at 20:23 on Wednesday.  '
+        'Will this work for you?',
+        'Dialog_Act-3': {
+            'Train-Inform': [['Arrive', '20:23'], ['Leave', '17:40'],
+                             ['Day', 'Wednesday'],
+                             ['Depart', 'birmingham new street']]
+        },
+        'User-4':
+        'That will, yes. Please make a booking for 5 people please.',
+    }, {
+        'System-4':
+        'I\'ve booked your train tickets, and your reference number is A9NHSO9Y.',
+        'Dialog_Act-4': {
+            'Train-OfferBooked': [['Ref', 'A9NHSO9Y']]
+        },
+        'User-5':
+        'Thanks so much. I would also need a place to say. '
+        'I am looking for something with 4 stars and has free wifi.'
+    }, {
+        'System-5':
+        'How about the cambridge belfry?  '
+        'It has all the attributes you requested and a great name!  '
+        'Maybe even a real belfry?',
+        'Dialog_Act-5': {
+            'Hotel-Recommend': [['Name', 'the cambridge belfry']]
+        },
+        'User-6':
+        'That sounds great, could you make a booking for me please?',
+    }, {
+        'System-6':
+        'What day would you like your booking for?',
+        'Dialog_Act-6': {
+            'Booking-Request': [['Day', '?']]
+        },
+        'User-7':
+        'Please book it for Wednesday for 5 people and 5 nights, please.',
+    }, {
+        'System-7': 'Booking was successful. Reference number is : 5NAWGJDC.',
+        'Dialog_Act-7': {
+            'Booking-Book': [['Ref', '5NAWGJDC']]
+        },
+        'User-8': 'Thank you, goodbye',
     }]
 
     def test_run(self):
@@ -51,21 +92,22 @@ class DialogStateTrackingTest(unittest.TestCase):
                 preprocessor=preprocessor)
         ]
 
-        history_states = [{}]
-        utter = {}
         pipelines_len = len(pipelines)
         import json
-        for step, item in enumerate(self.test_case):
-            utter.update(item)
-            result = pipelines[step % pipelines_len]({
-                'utter':
-                utter,
-                'history_states':
-                history_states
-            })
-            print(json.dumps(result))
+        for _test_case in self.test_case:
+            history_states = [{}]
+            utter = {}
+            for step, item in enumerate(_test_case):
+                utter.update(item)
+                result = pipelines[step % pipelines_len]({
+                    'utter':
+                    utter,
+                    'history_states':
+                    history_states
+                })
+                print(json.dumps(result))
 
-            history_states.extend([result['dialog_states'], {}])
+                history_states.extend([result['dialog_states'], {}])
 
     @unittest.skip('test with snapshot_download')
     def test_run_with_model_from_modelhub(self):