math-tutor/tutor/wiki.py

"""
小学数学苏格拉底导师 — LLM Wiki 学生知识追踪模块
参照 Karpathy llm-wiki 三层架构：Raw sources → Wiki → Schema
"""

import json
import os
import re
from datetime import datetime
from pathlib import Path

WIKI_DIR = Path(os.environ.get("MATH_TUTOR_DATA", os.path.expanduser("~/.hermes/projects/math-tutor/data")))


class StudentWiki:
    """每个学生的持久化知识Wiki —— LLM增量维护"""

    def __init__(self, student_id: str):
        self.student_id = student_id
        self.wiki_dir = WIKI_DIR / student_id
        self.wiki_dir.mkdir(parents=True, exist_ok=True)

        # 三层结构
        self.sources_dir = self.wiki_dir / "sources"      # 原始对话记录
        self.knowledge_dir = self.wiki_dir / "knowledge"   # 知识图谱
        self.schema_path = self.wiki_dir / "schema.json"   # 学生画像配置
        self.progress_path = self.wiki_dir / "progress.md"

        self._ensure_schema()

    def _ensure_schema(self):
        """初始化学生画像 schema"""
        if not self.schema_path.exists():
            schema = {
                "student_id": self.student_id,
                "created": datetime.now().isoformat(),
                "grade": None,
                "topics_mastered": [],
                "topics_learning": [],
                "topics_struggling": [],
                "learning_style": {"prefers_visual": True, "prefers_stories": True},
                "mistake_patterns": [],
                "milestones": []
            }
            self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")

    def get_schema(self) -> dict:
        return json.loads(self.schema_path.read_text(encoding="utf-8"))

    def update_schema(self, patch: dict):
        schema = self.get_schema()
        schema.update(patch)
        schema["modified"] = datetime.now().isoformat()
        self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")

    def ingest_session(self, question: str, answer: str, socratic_steps: list, student_response: str):
        """摄入一次对话——追加到 sources，触发 LLM 更新 wiki"""
        ts = datetime.now().isoformat()
        source_file = self.sources_dir / f"session_{ts.replace(':', '-')}.json"
        source_file.write_text(json.dumps({
            "timestamp": ts,
            "question": question,
            "answer": answer,
            "socratic_steps": socratic_steps,
            "student_response": student_response
        }, ensure_ascii=False, indent=2), encoding="utf-8")

    def get_knowledge_summary(self) -> str:
        """生成当前知识状态摘要，注入 LLM prompt"""
        schema = self.get_schema()
        return f"""## 学生画像
- 年级: {schema.get('grade', '未知')}
- 已掌握: {', '.join(schema.get('topics_mastered', [])) or '无'}
- 学习中: {', '.join(schema.get('topics_learning', [])) or '无'}
- 困难: {', '.join(schema.get('topics_struggling', [])) or '无'}
- 学习风格: 偏好{'视觉' if schema['learning_style']['prefers_visual'] else '文字'}·{'喜欢故事化' if schema['learning_style']['prefers_stories'] else '直接'}
- 常见错误模式: {', '.join(schema.get('mistake_patterns', [])) or '无'}"""

    def record_milestone(self, topic: str, description: str):
        schema = self.get_schema()
        schema.setdefault("milestones", []).append({
            "topic": topic, "description": description,
            "date": datetime.now().isoformat()
        })
        self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")