#!/usr/bin/env python3 """B2: 动态跟踪表 → 中东项目启动跟踪看板 (2026-06-08)""" import pandas as pd, warnings, json from pathlib import Path warnings.filterwarnings('ignore') import sys DATA_DATE = sys.argv[1] if len(sys.argv) > 1 else '2026-06-08' RAW_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/raw/月报数据/{DATA_DATE}') OUT_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/{DATA_DATE}/cleaned') OUT_DIR.mkdir(parents=True, exist_ok=True) INPUT_FILE = RAW_DIR / '动态跟踪表_REPORT3.xlsx' df = pd.read_excel(INPUT_FILE, sheet_name='REPORT3', header=None) # Row 0 = title, Row 1 = 驻外机构, Row 2 = project names, Row 3+ = tasks # Find Middle East columns me_cols = [] # (start_col, region_name, project_name) current_region = None current_project = None for c in range(0, df.shape[1], 3): region = str(df.iloc[1, c]) if c < df.shape[1] and pd.notna(df.iloc[1, c]) else '' project = str(df.iloc[2, c]) if c < df.shape[1] and pd.notna(df.iloc[2, c]) else '' if region and 'nan' not in region: current_region = region if project and project != 'nan' and '项目名称' not in project: current_project = project if current_region and '中东' in str(current_region): if current_project and current_project != 'nan': me_cols.append((c, current_region, current_project)) print(f'Found {len(me_cols)} Middle East project columns') # Build task rows for each project all_tasks = [] TASK_ROWS_START = 3 # Row index where task data starts (0-based) for start_col, region, project in me_cols: for row_idx in range(TASK_ROWS_START, df.shape[0]): task = str(df.iloc[row_idx, start_col + 0]) if pd.notna(df.iloc[row_idx, start_col + 0]) else '' plan_date = df.iloc[row_idx, start_col + 1] if start_col + 1 < df.shape[1] else None gap = df.iloc[row_idx, start_col + 2] if start_col + 2 < df.shape[1] else None if task and task != 'nan' and task.strip(): all_tasks.append({ '项目名称': project, '区域': region, '工作任务': task.strip(), '计划完成日期': plan_date if pd.notna(plan_date) else '', '差距天数': gap if pd.notna(gap) else '', }) df_tasks = pd.DataFrame(all_tasks) print(f'Extracted {len(df_tasks)} task rows across {df_tasks["项目名称"].nunique()} projects') # Analyze completion status df_tasks['差距数值'] = pd.to_numeric(df_tasks['差距天数'], errors='coerce') df_tasks['计划日期_parsed'] = pd.to_datetime(df_tasks['计划完成日期'], errors='coerce') # Project-level summary proj_summary = df_tasks.groupby('项目名称').agg( 总任务数=('工作任务', 'count'), 已完成=('差距天数', lambda x: (x.astype(str).str.strip() == '0').sum()), 逾期=('差距数值', lambda x: (x > 0).sum()), 未开始=('差距天数', lambda x: (x.astype(str).str.strip().isin(['', 'nan', '不涉及', '/', 'None'])).sum()), 不涉及=('差距天数', lambda x: (x.astype(str).str.contains('不涉及')).sum()), ).reset_index() proj_summary['完成率'] = (proj_summary['已完成'] / proj_summary['总任务数'] * 100).round(0).astype(int).astype(str) + '%' proj_summary = proj_summary.sort_values('逾期', ascending=False) print(f'\n📊 项目启动跟踪汇总:') print(f' 项目数: {len(proj_summary)}') print(f' 总任务数: {proj_summary["总任务数"].sum()}') print(f' 已完成: {proj_summary["已完成"].sum()}') print(f' 逾期: {proj_summary["逾期"].sum()}') print(f'\n逾期项目:') print(proj_summary[proj_summary['逾期'] > 0].head(15).to_string()) # Output df_tasks.to_parquet(OUT_DIR / 'tracking_tasks.parquet', index=False) df_tasks.to_csv(OUT_DIR / 'tracking_tasks.csv', index=False, encoding='utf-8-sig') proj_summary.to_csv(OUT_DIR / 'tracking_project_summary.csv', index=False, encoding='utf-8-sig') report = { 'source': str(INPUT_FILE), 'me_projects': len(proj_summary), 'total_tasks': int(proj_summary['总任务数'].sum()), 'completed': int(proj_summary['已完成'].sum()), 'overdue': int(proj_summary['逾期'].sum()), } with open(OUT_DIR / 'tracking_validation.json', 'w', encoding='utf-8') as f: json.dump(report, f, ensure_ascii=False, indent=2) print(f'\n✅ tracking_tasks.parquet + project_summary') print(f'\n完整项目列表:') print(proj_summary[['项目名称','总任务数','已完成','逾期','完成率']].to_string())