Browse Source

fix flake8 warning of dst

master
ly119399 3 years ago
parent
commit
ec64d14446
2 changed files with 17 additions and 17 deletions
  1. +1
    -1
      .pre-commit-config.yaml
  2. +16
    -16
      modelscope/preprocessors/space/fields/dst_processors.py

+ 1
- 1
.pre-commit-config.yaml View File

@@ -3,7 +3,7 @@ repos:
rev: 3.8.3 rev: 3.8.3
hooks: hooks:
- id: flake8 - id: flake8
exclude: thirdparty/|examples/|modelscope/preprocessors/space/fields/dst_processors.py
exclude: thirdparty/|examples/
- repo: https://github.com/timothycrosley/isort - repo: https://github.com/timothycrosley/isort
rev: 4.3.21 rev: 4.3.21
hooks: hooks:


+ 16
- 16
modelscope/preprocessors/space/fields/dst_processors.py View File

@@ -456,26 +456,26 @@ class multiwoz22Processor(DSTProcessor):
super().__init__() super().__init__()


def normalize_time(self, text): def normalize_time(self, text):
text = re.sub('(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2',
text = re.sub(r'(\d{1})(a\.?m\.?|p\.?m\.?)', r'\1 \2',
text) # am/pm without space text) # am/pm without space
text = re.sub('(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3',
text = re.sub(r'(^| )(\d{1,2}) (a\.?m\.?|p\.?m\.?)', r'\1\2:00 \3',
text) # am/pm short to long form text) # am/pm short to long form
text = re.sub( text = re.sub(
'(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)',
r'(^| )(at|from|by|until|after) ?(\d{1,2}) ?(\d{2})([^0-9]|$)',
r'\1\2 \3:\4\5', text) # Missing separator r'\1\2 \3:\4\5', text) # Missing separator
text = re.sub('(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3',
text = re.sub(r'(^| )(\d{2})[;.,](\d{2})', r'\1\2:\3',
text) # Wrong separator text) # Wrong separator
text = re.sub('(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)',
text = re.sub(r'(^| )(at|from|by|until|after) ?(\d{1,2})([;., ]|$)',
r'\1\2 \3:00\4', text) # normalize simple full hour time r'\1\2 \3:00\4', text) # normalize simple full hour time
text = re.sub('(^| )(\d{1}:\d{2})', r'\g<1>0\2',
text = re.sub(r'(^| )(\d{1}:\d{2})', r'\g<1>0\2',
text) # Add missing leading 0 text) # Add missing leading 0
# Map 12 hour times to 24 hour times # Map 12 hour times to 24 hour times
text = re.sub(
'(\d{2})(:\d{2}) ?p\.?m\.?', lambda x: str(
int(x.groups()[0]) + 12
if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups(
)[1], text)
text = re.sub('(^| )24:(\d{2})', r'\g<1>00:\2',
text = \
re.sub(
r'(\d{2})(:\d{2}) ?p\.?m\.?',
lambda x: str(int(x.groups()[0]) + 12
if int(x.groups()[0]) < 12 else int(x.groups()[0])) + x.groups()[1], text)
text = re.sub(r'(^| )24:(\d{2})', r'\g<1>00:\2',
text) # Correct times that use 24 as hour text) # Correct times that use 24 as hour
return text return text


@@ -562,7 +562,7 @@ class multiwoz22Processor(DSTProcessor):
utt_lower = convert_to_unicode(utt).lower() utt_lower = convert_to_unicode(utt).lower()
utt_lower = self.normalize_text(utt_lower) utt_lower = self.normalize_text(utt_lower)
utt_tok = [ utt_tok = [
tok for tok in map(str.strip, re.split('(\W+)', utt_lower))
tok for tok in map(str.strip, re.split(r'(\W+)', utt_lower))
if len(tok) > 0 if len(tok) > 0
] ]
return utt_tok return utt_tok
@@ -584,7 +584,7 @@ class multiwoz22Processor(DSTProcessor):
find_pos = [] find_pos = []
found = False found = False
label_list = [ label_list = [
item for item in map(str.strip, re.split('(\W+)', value_label))
item for item in map(str.strip, re.split(r'(\W+)', value_label))
if len(item) > 0 if len(item) > 0
] ]
len_label = len(label_list) len_label = len(label_list)
@@ -635,11 +635,11 @@ class multiwoz22Processor(DSTProcessor):
def is_in_list(self, tok, value): def is_in_list(self, tok, value):
found = False found = False
tok_list = [ tok_list = [
item for item in map(str.strip, re.split('(\W+)', tok))
item for item in map(str.strip, re.split(r'(\W+)', tok))
if len(item) > 0 if len(item) > 0
] ]
value_list = [ value_list = [
item for item in map(str.strip, re.split('(\W+)', value))
item for item in map(str.strip, re.split(r'(\W+)', value))
if len(item) > 0 if len(item) > 0
] ]
tok_len = len(tok_list) tok_len = len(tok_list)


Loading…
Cancel
Save