math-tutor/tutor/wiki.py

86 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
小学数学苏格拉底导师 — LLM Wiki 学生知识追踪模块
参照 Karpathy llm-wiki 三层架构Raw sources → Wiki → Schema
"""
import json
import os
import re
from datetime import datetime
from pathlib import Path
WIKI_DIR = Path(os.environ.get("MATH_TUTOR_DATA", os.path.expanduser("~/.hermes/projects/math-tutor/data")))
class StudentWiki:
"""每个学生的持久化知识Wiki —— LLM增量维护"""
def __init__(self, student_id: str):
self.student_id = student_id
self.wiki_dir = WIKI_DIR / student_id
self.wiki_dir.mkdir(parents=True, exist_ok=True)
# 三层结构
self.sources_dir = self.wiki_dir / "sources" # 原始对话记录
self.knowledge_dir = self.wiki_dir / "knowledge" # 知识图谱
self.schema_path = self.wiki_dir / "schema.json" # 学生画像配置
self.progress_path = self.wiki_dir / "progress.md"
self._ensure_schema()
def _ensure_schema(self):
"""初始化学生画像 schema"""
if not self.schema_path.exists():
schema = {
"student_id": self.student_id,
"created": datetime.now().isoformat(),
"grade": None,
"topics_mastered": [],
"topics_learning": [],
"topics_struggling": [],
"learning_style": {"prefers_visual": True, "prefers_stories": True},
"mistake_patterns": [],
"milestones": []
}
self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")
def get_schema(self) -> dict:
return json.loads(self.schema_path.read_text(encoding="utf-8"))
def update_schema(self, patch: dict):
schema = self.get_schema()
schema.update(patch)
schema["modified"] = datetime.now().isoformat()
self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")
def ingest_session(self, question: str, answer: str, socratic_steps: list, student_response: str):
"""摄入一次对话——追加到 sources触发 LLM 更新 wiki"""
ts = datetime.now().isoformat()
source_file = self.sources_dir / f"session_{ts.replace(':', '-')}.json"
source_file.write_text(json.dumps({
"timestamp": ts,
"question": question,
"answer": answer,
"socratic_steps": socratic_steps,
"student_response": student_response
}, ensure_ascii=False, indent=2), encoding="utf-8")
def get_knowledge_summary(self) -> str:
"""生成当前知识状态摘要,注入 LLM prompt"""
schema = self.get_schema()
return f"""## 学生画像
- 年级: {schema.get('grade', '未知')}
- 已掌握: {', '.join(schema.get('topics_mastered', [])) or ''}
- 学习中: {', '.join(schema.get('topics_learning', [])) or ''}
- 困难: {', '.join(schema.get('topics_struggling', [])) or ''}
- 学习风格: 偏好{'视觉' if schema['learning_style']['prefers_visual'] else '文字'}·{'喜欢故事化' if schema['learning_style']['prefers_stories'] else '直接'}
- 常见错误模式: {', '.join(schema.get('mistake_patterns', [])) or ''}"""
def record_milestone(self, topic: str, description: str):
schema = self.get_schema()
schema.setdefault("milestones", []).append({
"topic": topic, "description": description,
"date": datetime.now().isoformat()
})
self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")