fix: time window regex misses fuzzy quantifiers like 'last few hours'
The relative-time regex only matched digits between 'last/past' and the unit, so 'last few hours' fell through to dateparser which then found the bare word 'hours' and resolved it as midnight local time. Extended the regex to capture 'few', 'couple of', 'several', 'a few' as approximate quantifiers, mapped to 3 units each. Numeric expressions and bare 'last hour' still work as before.
This commit is contained in:
parent
0b3d95cd26
commit
3501240231
1 changed files with 7 additions and 3 deletions
|
|
@ -25,16 +25,20 @@ except ImportError:
|
||||||
|
|
||||||
|
|
||||||
_RELATIVE_RE = re.compile(
|
_RELATIVE_RE = re.compile(
|
||||||
r"\b(?:last|past)\s+(\d+)?\s*(minute|hour|day|week)s?\b",
|
r"\b(?:last|past)\s+(?:(?P<n>\d+)|(?P<approx>a\s+few|few|couple(?:\s+of)?|several))?\s*(?P<unit>minute|hour|day|week)s?\b",
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
_RELATIVE_UNITS = {"minute": 1, "hour": 60, "day": 1440, "week": 10080}
|
_RELATIVE_UNITS = {"minute": 1, "hour": 60, "day": 1440, "week": 10080}
|
||||||
|
# Fuzzy quantifiers map to a reasonable span so "last few hours" → 3h window
|
||||||
|
_APPROX_N = 3
|
||||||
|
|
||||||
|
|
||||||
def _relative_window(match: re.Match) -> tuple[str, str]:
|
def _relative_window(match: re.Match) -> tuple[str, str]:
|
||||||
"""Convert a relative time match to (since_iso, until_iso)."""
|
"""Convert a relative time match to (since_iso, until_iso)."""
|
||||||
n = int(match.group(1) or 1)
|
n_str = match.group("n")
|
||||||
unit = match.group(2).lower()
|
approx = match.group("approx")
|
||||||
|
unit = match.group("unit").lower()
|
||||||
|
n = int(n_str) if n_str else (_APPROX_N if approx else 1)
|
||||||
minutes = n * _RELATIVE_UNITS[unit]
|
minutes = n * _RELATIVE_UNITS[unit]
|
||||||
return _last_n_minutes(minutes), _now_iso()
|
return _last_n_minutes(minutes), _now_iso()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue