fix: Payment inference returns only exact-name matches when present
Some checks failed
Deploy to K8s / deploy (push) Successful in 11s
Build and Push / build (push) Successful in 7s
Build and Push / build-go (push) Failing after 5s

match_members() now short-circuits on whole-word full-name hits and
uses word-boundary regex everywhere else, so a nickname that is a
substring of another member's surname (e.g. "tov" inside "ottova")
no longer produces false positives. Adds tests/test_match_members.py.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-04 23:08:59 +02:00
parent 97f568f49f
commit 81b36878b3
3 changed files with 161 additions and 14 deletions

View File

@@ -48,6 +48,11 @@ def _build_name_variants(name: str) -> list[str]:
return [v for v in variants if len(v) >= 3]
def _word_in(needle: str, haystack: str) -> bool:
"""Return True if needle appears as a whole word in haystack."""
return bool(re.search(rf"\b{re.escape(needle)}\b", haystack))
def match_members(
text: str, member_names: list[str]
) -> list[tuple[str, str]]:
@@ -56,13 +61,26 @@ def match_members(
Returns list of (member_name, confidence) where confidence is 'auto' or 'review'.
"""
normalized_text = normalize(text)
# Short-circuit: if any member's full canonical name appears verbatim (whole words),
# return only those matches and skip all fuzzy/nickname checks. This prevents a
# nickname that is a substring of another member's surname from producing false hits.
exact_matches = []
for name in member_names:
variants = _build_name_variants(name)
full_name = variants[0] if variants else ""
if full_name and _word_in(full_name, normalized_text):
exact_matches.append((name, "auto"))
if exact_matches:
return exact_matches
matches = []
for name in member_names:
variants = _build_name_variants(name)
full_name = variants[0] if variants else ""
parts = full_name.split()
# 1. Full name match (exact sequence) = high confidence
if full_name and full_name in normalized_text:
matches.append((name, "auto"))
@@ -70,17 +88,16 @@ def match_members(
# 2. Both first and last name present (any order) = high confidence
if len(parts) >= 2:
if parts[0] in normalized_text and parts[-1] in normalized_text:
if _word_in(parts[0], normalized_text) and _word_in(parts[-1], normalized_text):
matches.append((name, "auto"))
continue
# 3. Nickname + one part of the name = high confidence
# 3. Nickname present = high confidence
nickname = ""
nickname_match = re.search(r"\(([^)]+)\)", name)
if nickname_match:
nickname = normalize(nickname_match.group(1))
if nickname and nickname in normalized_text:
# Nickname alone is often enough, but let's check if it's combined with a name part
if nickname and _word_in(nickname, normalized_text):
matches.append((name, "auto"))
continue
@@ -89,19 +106,16 @@ def match_members(
first_name = parts[0]
last_name = parts[-1]
_COMMON_SURNAMES = {"novak", "novakova", "prach"}
# Match last name
if len(last_name) >= 4 and last_name not in _COMMON_SURNAMES and last_name in normalized_text:
if len(last_name) >= 4 and last_name not in _COMMON_SURNAMES and _word_in(last_name, normalized_text):
matches.append((name, "review"))
continue
# Match first name (if not too short)
if len(first_name) >= 3 and first_name in normalized_text:
if len(first_name) >= 3 and _word_in(first_name, normalized_text):
matches.append((name, "review"))
continue
elif len(parts) == 1:
# Single name member
if len(parts[0]) >= 4 and parts[0] in normalized_text:
if len(parts[0]) >= 4 and _word_in(parts[0], normalized_text):
matches.append((name, "review"))
continue
@@ -109,7 +123,6 @@ def match_members(
# If we have any "auto" matches, discard all "review" matches
auto_matches = [m for m in matches if m[1] == "auto"]
if auto_matches:
# If multiple auto matches, keep them (ambiguous but high priority)
return auto_matches
return matches