101 lines
4.5 KiB
Python

#!/usr/bin/env python3
"""B2: 动态跟踪表 → 中东项目启动跟踪看板 (2026-06-08)"""
import pandas as pd, warnings, json
from pathlib import Path
warnings.filterwarnings('ignore')
import sys
DATA_DATE = sys.argv[1] if len(sys.argv) > 1 else '2026-06-08'
RAW_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/raw/月报数据/{DATA_DATE}')
OUT_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/{DATA_DATE}/cleaned')
OUT_DIR.mkdir(parents=True, exist_ok=True)
INPUT_FILE = RAW_DIR / '动态跟踪表_REPORT3.xlsx'
df = pd.read_excel(INPUT_FILE, sheet_name='REPORT3', header=None)
# Row 0 = title, Row 1 = 驻外机构, Row 2 = project names, Row 3+ = tasks
# Find Middle East columns
me_cols = [] # (start_col, region_name, project_name)
current_region = None
current_project = None
for c in range(0, df.shape[1], 3):
region = str(df.iloc[1, c]) if c < df.shape[1] and pd.notna(df.iloc[1, c]) else ''
project = str(df.iloc[2, c]) if c < df.shape[1] and pd.notna(df.iloc[2, c]) else ''
if region and 'nan' not in region:
current_region = region
if project and project != 'nan' and '项目名称' not in project:
current_project = project
if current_region and '中东' in str(current_region):
if current_project and current_project != 'nan':
me_cols.append((c, current_region, current_project))
print(f'Found {len(me_cols)} Middle East project columns')
# Build task rows for each project
all_tasks = []
TASK_ROWS_START = 3 # Row index where task data starts (0-based)
for start_col, region, project in me_cols:
for row_idx in range(TASK_ROWS_START, df.shape[0]):
task = str(df.iloc[row_idx, start_col + 0]) if pd.notna(df.iloc[row_idx, start_col + 0]) else ''
plan_date = df.iloc[row_idx, start_col + 1] if start_col + 1 < df.shape[1] else None
gap = df.iloc[row_idx, start_col + 2] if start_col + 2 < df.shape[1] else None
if task and task != 'nan' and task.strip():
all_tasks.append({
'项目名称': project,
'区域': region,
'工作任务': task.strip(),
'计划完成日期': plan_date if pd.notna(plan_date) else '',
'差距天数': gap if pd.notna(gap) else '',
})
df_tasks = pd.DataFrame(all_tasks)
print(f'Extracted {len(df_tasks)} task rows across {df_tasks["项目名称"].nunique()} projects')
# Analyze completion status
df_tasks['差距数值'] = pd.to_numeric(df_tasks['差距天数'], errors='coerce')
df_tasks['计划日期_parsed'] = pd.to_datetime(df_tasks['计划完成日期'], errors='coerce')
# Project-level summary
proj_summary = df_tasks.groupby('项目名称').agg(
总任务数=('工作任务', 'count'),
已完成=('差距天数', lambda x: (x.astype(str).str.strip() == '0').sum()),
逾期=('差距数值', lambda x: (x > 0).sum()),
未开始=('差距天数', lambda x: (x.astype(str).str.strip().isin(['', 'nan', '不涉及', '/', 'None'])).sum()),
不涉及=('差距天数', lambda x: (x.astype(str).str.contains('不涉及')).sum()),
).reset_index()
proj_summary['完成率'] = (proj_summary['已完成'] / proj_summary['总任务数'] * 100).round(0).astype(int).astype(str) + '%'
proj_summary = proj_summary.sort_values('逾期', ascending=False)
print(f'\n📊 项目启动跟踪汇总:')
print(f' 项目数: {len(proj_summary)}')
print(f' 总任务数: {proj_summary["总任务数"].sum()}')
print(f' 已完成: {proj_summary["已完成"].sum()}')
print(f' 逾期: {proj_summary["逾期"].sum()}')
print(f'\n逾期项目:')
print(proj_summary[proj_summary['逾期'] > 0].head(15).to_string())
# Output
df_tasks.to_parquet(OUT_DIR / 'tracking_tasks.parquet', index=False)
df_tasks.to_csv(OUT_DIR / 'tracking_tasks.csv', index=False, encoding='utf-8-sig')
proj_summary.to_csv(OUT_DIR / 'tracking_project_summary.csv', index=False, encoding='utf-8-sig')
report = {
'source': str(INPUT_FILE),
'me_projects': len(proj_summary),
'total_tasks': int(proj_summary['总任务数'].sum()),
'completed': int(proj_summary['已完成'].sum()),
'overdue': int(proj_summary['逾期'].sum()),
}
with open(OUT_DIR / 'tracking_validation.json', 'w', encoding='utf-8') as f:
json.dump(report, f, ensure_ascii=False, indent=2)
print(f'\n✅ tracking_tasks.parquet + project_summary')
print(f'\n完整项目列表:')
print(proj_summary[['项目名称','总任务数','已完成','逾期','完成率']].to_string())