86 lines
3.6 KiB
Python
86 lines
3.6 KiB
Python
"""
|
||
小学数学苏格拉底导师 — LLM Wiki 学生知识追踪模块
|
||
参照 Karpathy llm-wiki 三层架构:Raw sources → Wiki → Schema
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import re
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
WIKI_DIR = Path(os.environ.get("MATH_TUTOR_DATA", os.path.expanduser("~/.hermes/projects/math-tutor/data")))
|
||
|
||
|
||
class StudentWiki:
|
||
"""每个学生的持久化知识Wiki —— LLM增量维护"""
|
||
|
||
def __init__(self, student_id: str):
|
||
self.student_id = student_id
|
||
self.wiki_dir = WIKI_DIR / student_id
|
||
self.wiki_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 三层结构
|
||
self.sources_dir = self.wiki_dir / "sources" # 原始对话记录
|
||
self.knowledge_dir = self.wiki_dir / "knowledge" # 知识图谱
|
||
self.schema_path = self.wiki_dir / "schema.json" # 学生画像配置
|
||
self.progress_path = self.wiki_dir / "progress.md"
|
||
|
||
self._ensure_schema()
|
||
|
||
def _ensure_schema(self):
|
||
"""初始化学生画像 schema"""
|
||
if not self.schema_path.exists():
|
||
schema = {
|
||
"student_id": self.student_id,
|
||
"created": datetime.now().isoformat(),
|
||
"grade": None,
|
||
"topics_mastered": [],
|
||
"topics_learning": [],
|
||
"topics_struggling": [],
|
||
"learning_style": {"prefers_visual": True, "prefers_stories": True},
|
||
"mistake_patterns": [],
|
||
"milestones": []
|
||
}
|
||
self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
|
||
def get_schema(self) -> dict:
|
||
return json.loads(self.schema_path.read_text(encoding="utf-8"))
|
||
|
||
def update_schema(self, patch: dict):
|
||
schema = self.get_schema()
|
||
schema.update(patch)
|
||
schema["modified"] = datetime.now().isoformat()
|
||
self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
|
||
def ingest_session(self, question: str, answer: str, socratic_steps: list, student_response: str):
|
||
"""摄入一次对话——追加到 sources,触发 LLM 更新 wiki"""
|
||
ts = datetime.now().isoformat()
|
||
source_file = self.sources_dir / f"session_{ts.replace(':', '-')}.json"
|
||
source_file.write_text(json.dumps({
|
||
"timestamp": ts,
|
||
"question": question,
|
||
"answer": answer,
|
||
"socratic_steps": socratic_steps,
|
||
"student_response": student_response
|
||
}, ensure_ascii=False, indent=2), encoding="utf-8")
|
||
|
||
def get_knowledge_summary(self) -> str:
|
||
"""生成当前知识状态摘要,注入 LLM prompt"""
|
||
schema = self.get_schema()
|
||
return f"""## 学生画像
|
||
- 年级: {schema.get('grade', '未知')}
|
||
- 已掌握: {', '.join(schema.get('topics_mastered', [])) or '无'}
|
||
- 学习中: {', '.join(schema.get('topics_learning', [])) or '无'}
|
||
- 困难: {', '.join(schema.get('topics_struggling', [])) or '无'}
|
||
- 学习风格: 偏好{'视觉' if schema['learning_style']['prefers_visual'] else '文字'}·{'喜欢故事化' if schema['learning_style']['prefers_stories'] else '直接'}
|
||
- 常见错误模式: {', '.join(schema.get('mistake_patterns', [])) or '无'}"""
|
||
|
||
def record_milestone(self, topic: str, description: str):
|
||
schema = self.get_schema()
|
||
schema.setdefault("milestones", []).append({
|
||
"topic": topic, "description": description,
|
||
"date": datetime.now().isoformat()
|
||
})
|
||
self.schema_path.write_text(json.dumps(schema, ensure_ascii=False, indent=2), encoding="utf-8")
|