Source code for mud_server.translation.validator

"""Output validator for the OOC→IC translation layer.

``OutputValidator`` takes raw LLM text from the renderer and decides
whether it is suitable for storage as in-character dialogue.  Unsuitable
output is rejected (returning ``None``) so the caller can fall back to
the original OOC message.

Validation pipeline (applied in order)
---------------------------------------
1. **Empty check** — blank string → ``None``.
2. **PASSTHROUGH sentinel** — the model signals that the OOC input has
   no meaningful IC equivalent (e.g. it was a command or meta-question).
   → ``None``.
3. **Multi-line check** — strict mode rejects immediately; non-strict
   mode takes only the first non-empty line.
4. **Quote stripping** — some models (e.g. gemma2) consistently wrap
   output in ``"..."`` or ``'...'``; these are stripped before the
   forbidden-pattern check so that legitimate dialogue is not rejected
   purely because of quoting style.
5. **Forbidden pattern check** — strict mode only.  Rejects outputs that
   look like emotes, stage directions, or parenthetical narration.  These
   indicate the model has not followed the "one line of raw dialogue"
   constraint.
6. **Max-length enforcement** — strict mode rejects; non-strict truncates.
7. **Final empty check** — returns ``None`` if cleaning left an empty
   string.

Strict vs non-strict
---------------------
``strict_mode=True`` (the default) treats any constraint violation as a
hard rejection and returns ``None``.  This is the recommended setting for
production worlds because it guarantees that only well-formed IC dialogue
is ever stored — at the cost of occasionally falling back to OOC when
the model produces slightly imperfect output.

``strict_mode=False`` makes a best-effort cleanup attempt for minor
violations (multi-line → first line; over-length → truncate).  Useful
for low-stakes worlds or during prompt development.
"""

from __future__ import annotations

import logging
import re

logger = logging.getLogger(__name__)

# The model uses this sentinel to signal that the OOC input has no
# meaningful IC equivalent (e.g. a command, a meta-question, or something
# that the model cannot render without breaking the rules).  Returning
# PASSTHROUGH is preferable to hallucinated dialogue.
PASSTHROUGH_SENTINEL = "PASSTHROUGH"

# Patterns that indicate the model has produced output that breaks the
# "single line of raw spoken dialogue" constraint.  Each is checked
# independently; any match triggers a rejection in strict mode.
#
# ^\*.*\*$    — emote lines wrapped in asterisks (*waves hand*)
# \[.*\]      — stage directions in square brackets [She turns away]
# ^\(.*\)$    — parenthetical narration (Mira looks up)
#
# Note: the ^".*"$ (fully double-quoted line) pattern was removed.  Quote
# stripping now runs before this check (step 4), so a model output like
# `"Hello."` is stripped to `Hello.` before reaching here.
_FORBIDDEN_PATTERNS: list[re.Pattern] = [
    re.compile(r"^\*.*\*$"),
    re.compile(r"\[.*\]"),
    re.compile(r"^\(.*\)$"),
]



[docs]
class OutputValidator:
    """Validates and cleans raw LLM output before storage.

    Attributes:
        _strict_mode:     When ``True``, any constraint violation → ``None``.
        _max_output_chars: Hard ceiling on IC output character count.
    """

    def __init__(self, *, strict_mode: bool, max_output_chars: int) -> None:
        """Initialise the validator.

        Args:
            strict_mode:      Reject on first violation vs. best-effort cleanup.
            max_output_chars: Maximum allowed character count in the IC output.
        """
        self._strict_mode = strict_mode
        self._max_output_chars = max_output_chars


[docs]
    def validate(self, ic_raw: str) -> str | None:
        """Validate and clean a raw LLM response string.

        Runs the full validation pipeline and returns either a clean IC
        string or ``None``.  A ``None`` return is always accompanied by a
        WARNING log entry so that rejection reasons are traceable.

        Args:
            ic_raw: Raw text returned by the renderer.

        Returns:
            Cleaned IC string on success, ``None`` if validation fails.
        """
        # ── 1. Empty check ────────────────────────────────────────────────────
        if not ic_raw or not ic_raw.strip():
            return None

        text = ic_raw.strip()

        # ── 2. PASSTHROUGH sentinel ────────────────────────────────────────────
        if text.upper().startswith(PASSTHROUGH_SENTINEL):
            logger.debug("OutputValidator: PASSTHROUGH sentinel returned by model.")
            return None

        # ── 3. Multi-line check ───────────────────────────────────────────────
        if "\n" in text:
            if self._strict_mode:
                logger.warning("OutputValidator: strict_mode rejected multi-line output.")
                return None
            # Non-strict: take only the first non-empty line.
            first_line = next((line.strip() for line in text.splitlines() if line.strip()), "")
            if not first_line:
                return None
            text = first_line

        # ── 4. Quote stripping ────────────────────────────────────────────────
        # Some models (e.g. gemma2) consistently wrap output in quotation
        # marks even when instructed not to.  Strip before the forbidden-
        # pattern check so that `"Hello."` becomes `Hello.` and is not
        # incorrectly rejected as a quoted speech block.
        text = text.strip('"').strip("'").strip()

        # ── 5. Forbidden pattern check (strict mode only) ─────────────────────
        if self._strict_mode:
            for pattern in _FORBIDDEN_PATTERNS:
                if pattern.search(text):
                    logger.warning(
                        "OutputValidator: strict_mode rejected output matching pattern %r: %r",
                        pattern.pattern,
                        text[:60],
                    )
                    return None

        # ── 6. Max-length enforcement ─────────────────────────────────────────
        if len(text) > self._max_output_chars:
            if self._strict_mode:
                logger.warning(
                    "OutputValidator: strict_mode rejected output exceeding "
                    "max_output_chars (%d > %d).",
                    len(text),
                    self._max_output_chars,
                )
                return None
            # Non-strict: truncate at the last word boundary if possible.
            text = text[: self._max_output_chars].rstrip()

        # ── 7. Final empty check ─────────────────────────────────────────────
        return text if text else None