fix: Payment inference returns only exact-name matches when present

match_members() now short-circuits on whole-word full-name hits and uses word-boundary regex everywhere else, so a nickname that is a substring of another member's surname (e.g. "tov" inside "ottova") no longer produces false positives. Adds tests/test_match_members.py. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-04 23:08:59 +02:00
parent 97f568f49f
commit 81b36878b3
3 changed files with 161 additions and 14 deletions
--- a/scripts/match_payments.py
+++ b/scripts/match_payments.py
@@ -48,6 +48,11 @@ def _build_name_variants(name: str) -> list[str]:
    return [v for v in variants if len(v) >= 3]


+def _word_in(needle: str, haystack: str) -> bool:
+    """Return True if needle appears as a whole word in haystack."""
+    return bool(re.search(rf"\b{re.escape(needle)}\b", haystack))
+
+
 def match_members(
    text: str, member_names: list[str]
 ) -> list[tuple[str, str]]:
@@ -56,13 +61,26 @@ def match_members(
    Returns list of (member_name, confidence) where confidence is 'auto' or 'review'.
    """
    normalized_text = normalize(text)
+
+    # Short-circuit: if any member's full canonical name appears verbatim (whole words),
+    # return only those matches and skip all fuzzy/nickname checks. This prevents a
+    # nickname that is a substring of another member's surname from producing false hits.
+    exact_matches = []
+    for name in member_names:
+        variants = _build_name_variants(name)
+        full_name = variants[0] if variants else ""
+        if full_name and _word_in(full_name, normalized_text):
+            exact_matches.append((name, "auto"))
+    if exact_matches:
+        return exact_matches
+
    matches = []

    for name in member_names:
        variants = _build_name_variants(name)
        full_name = variants[0] if variants else ""
        parts = full_name.split()
-        
+
        # 1. Full name match (exact sequence) = high confidence
        if full_name and full_name in normalized_text:
            matches.append((name, "auto"))
@@ -70,17 +88,16 @@ def match_members(

        # 2. Both first and last name present (any order) = high confidence
        if len(parts) >= 2:
-            if parts[0] in normalized_text and parts[-1] in normalized_text:
+            if _word_in(parts[0], normalized_text) and _word_in(parts[-1], normalized_text):
                matches.append((name, "auto"))
                continue

-        # 3. Nickname + one part of the name = high confidence
+        # 3. Nickname present = high confidence
        nickname = ""
        nickname_match = re.search(r"\(([^)]+)\)", name)
        if nickname_match:
            nickname = normalize(nickname_match.group(1))
-            if nickname and nickname in normalized_text:
-                # Nickname alone is often enough, but let's check if it's combined with a name part
+            if nickname and _word_in(nickname, normalized_text):
                matches.append((name, "auto"))
                continue

@@ -89,19 +106,16 @@ def match_members(
            first_name = parts[0]
            last_name = parts[-1]
            _COMMON_SURNAMES = {"novak", "novakova", "prach"}
-            
-            # Match last name
-            if len(last_name) >= 4 and last_name not in _COMMON_SURNAMES and last_name in normalized_text:
+
+            if len(last_name) >= 4 and last_name not in _COMMON_SURNAMES and _word_in(last_name, normalized_text):
                matches.append((name, "review"))
                continue
-            
-            # Match first name (if not too short)
-            if len(first_name) >= 3 and first_name in normalized_text:
+
+            if len(first_name) >= 3 and _word_in(first_name, normalized_text):
                matches.append((name, "review"))
                continue
        elif len(parts) == 1:
-            # Single name member
-            if len(parts[0]) >= 4 and parts[0] in normalized_text:
+            if len(parts[0]) >= 4 and _word_in(parts[0], normalized_text):
                matches.append((name, "review"))
                continue

@@ -109,7 +123,6 @@ def match_members(
    # If we have any "auto" matches, discard all "review" matches
    auto_matches = [m for m in matches if m[1] == "auto"]
    if auto_matches:
-        # If multiple auto matches, keep them (ambiguous but high priority)
        return auto_matches

    return matches