#!/usr/bin/env python3 """B1: 技术方案统计表 → 危大方案编审进度看板""" import pandas as pd, openpyxl, warnings, json from datetime import date from pathlib import Path warnings.filterwarnings('ignore') DATA_DATE = '2026-06-08' RAW_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/raw/月报数据/{DATA_DATE}') OUT_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/{DATA_DATE}/cleaned') OUT_DIR.mkdir(parents=True, exist_ok=True) INPUT_FILE = RAW_DIR / '技术方案统计表.xlsx' TODAY = date.today() wb = openpyxl.load_workbook(INPUT_FILE, data_only=True) ws = wb['sheet1'] headers = {} for cell in ws[2]: if cell.value: headers[cell.column_letter] = str(cell.value).strip() raw_rows = [] for row in ws.iter_rows(min_row=3, max_row=ws.max_row, values_only=False): vals = {} for cell in row: try: cl = cell.column_letter if cl in headers: vals[headers[cl]] = cell.value except AttributeError: pass if any(v is not None for v in vals.values()): raw_rows.append(vals) wb.close() df = pd.DataFrame(raw_rows) # Forward-fill grouping columns (NOT 方案名称!) for col in ['所属区域','所属国别','项目名称','是否超一定规模']: if col in df.columns: df[col] = df[col].ffill() # 产业链: clean numeric garbage + ff by project group + keyword fallback CHAIN_KW = {'道桥':['道路','桥梁','桥','公路','高速','交通','立交','铁路','轨道'], '建筑':['房建','建筑','大楼','酒店','办公','住宅','别墅','公寓','商场'], '管网':['供水','排水','污水','管网','管线','基础设施','雨水'], '港口':['港口','码头','水工','海工','疏浚','吹填','航道','船'], '电力':['电力','发电','光伏','风电','变电站'], '生态环保':['环保','生态','绿化','园林']} if '产业链' in df.columns: df['产业链'] = df['产业链'].apply(lambda x: None if pd.api.types.is_number(x) or (isinstance(x,str) and x.strip().isdigit()) else x) df['产业链'] = df.groupby('项目名称')['产业链'].transform(lambda x: x.ffill()) def _infer_chain(name): if pd.isna(name): return None for chain,kws in CHAIN_KW.items(): for kw in kws: if kw in str(name): return chain return None for idx in df[df['产业链'].isna()].index: inferred = _infer_chain(df.at[idx,'项目名称']) if inferred: df.at[idx,'产业链'] = inferred # Filter ME + non-empty scheme ME = '中国港湾中东区域公司' df_me = df[(df['所属区域'] == ME) & (df['方案名称'].notna()) & (df['方案名称'].astype(str).str.strip() != '')].copy() # 国别修正 CM = {'阿联酋':'阿拉伯联合酋长国','沙特':'沙特阿拉伯','卡塔尔':'卡塔尔','科威特':'科威特','埃及':'埃及','阿曼':'阿曼','巴林':'巴林'} for idx in df_me.index: p = df_me.at[idx,'项目名称']; c = str(df_me.at[idx,'所属国别']) for kw, co in CM.items(): if kw in str(p) and c != co: df_me.at[idx,'所属国别'] = co df_me['分部分项工程计划开工日期'] = pd.to_datetime(df_me['分部分项工程计划开工日期'], errors='coerce') df_me['方案状态_clean'] = df_me['方案状态'].fillna('未知').astype(str) approved_kw = ['已审批', '已备案'] df_me['是否完成审批'] = df_me['方案状态_clean'].apply(lambda x: any(kw in str(x) for kw in approved_kw)) df_me['是否有效登记'] = df_me.apply( lambda r: not ('已作废' in str(r.get('方案状态_clean','')) or '作废' in str(r.get('方案名称','')) or '拟作废' in str(r.get('方案名称',''))), axis=1) df_me['开工年份'] = df_me['分部分项工程计划开工日期'].dt.year df_me['开工月份'] = df_me['分部分项工程计划开工日期'].dt.month df_me['计划开工日期倒数'] = (df_me['分部分项工程计划开工日期'] - pd.Timestamp(TODAY)).dt.days def calc_warning(row): if not row.get('是否有效登记', True): return 'none' if row.get('是否完成审批', False): return 'none' status = str(row.get('方案状态_clean','')) if '在实施' in status and ('未审批' in status or '审批中' in status): return 'red' days = row.get('计划开工日期倒数', 999) if days is None or pd.isna(days): return 'none' if 0 <= days <= 30: return 'orange' if 30 < days <= 45: return 'yellow' return 'none' df_me['预警信号'] = df_me.apply(calc_warning, axis=1) df_me['审批进行中'] = df_me['方案状态_clean'].apply(lambda x: '在实施' in str(x) or '审批中' in str(x)) df_me['是否逾期未审批'] = (df_me['分部分项工程计划开工日期'] < pd.Timestamp(TODAY)) & (~df_me['是否完成审批']) valid = df_me[df_me['是否有效登记']] total = len(valid) oversized = (valid['是否超一定规模'].astype(str) == '是').sum() approved = valid['是否完成审批'].sum() unapproved = total - approved warn_count = (valid['预警信号'] != 'none').sum() red_count = (valid['预警信号'] == 'red').sum() orange_count = (valid['预警信号'] == 'orange').sum() countries = df_me['所属国别'].nunique() projects = df_me['项目名称'].nunique() print(f'总原始={len(df)} 中东={len(df_me)} 有效={total}') print(f'超规={oversized} 已审批={approved}({approved/total*100:.0f}%) 未审批={unapproved}') print(f'预警={warn_count}(R{red_count}/O{orange_count}) 国别={countries} 项目={projects}') print(f'\n方案状态分布:') for s, c in df_me['方案状态_clean'].value_counts().head(15).items(): print(f' {s}: {c}') print(f'\n产业链: {df_me["产业链"].value_counts().to_dict()}') for col in df_me.columns: if df_me[col].dtype == 'object': try: df_me[col] = df_me[col].astype(str) except: pass df_me.to_parquet(OUT_DIR / 'methods_cleaned.parquet', index=False) df_me.to_csv(OUT_DIR / 'methods_cleaned.csv', index=False, encoding='utf-8-sig') ps = valid.groupby('项目名称').agg( 方案总数=('方案名称','count'), 已审批=('是否完成审批','sum'), 超规=('是否超一定规模', lambda x: (x.astype(str)=='是').sum()), 红色预警=('预警信号', lambda x: (x=='red').sum()), 橙色预警=('预警信号', lambda x: (x=='orange').sum()), ).reset_index() ps['审批率'] = (ps['已审批']/ps['方案总数']*100).round(0).astype(int).astype(str)+'%' ps['国别'] = ps['项目名称'].map(valid.groupby('项目名称')['所属国别'].first()) ps = ps.sort_values('方案总数', ascending=False) ps.to_csv(OUT_DIR / 'project_summary.csv', index=False, encoding='utf-8-sig') report = {'date':DATA_DATE,'source':str(INPUT_FILE),'total_raw':len(df),'me_rows':len(df_me), 'summary':{'有效方案':total,'超规':int(oversized),'已审批':int(approved),'审批率':f'{approved/total*100:.0f}%', '未审批':int(unapproved),'预警':int(warn_count),'红色':int(red_count),'橙色':int(orange_count),'国别':int(countries),'项目':int(projects)}} with open(OUT_DIR/'validation_report.json','w',encoding='utf-8') as f: json.dump(report,f,ensure_ascii=False,indent=2) print(f'\n✅ methods_cleaned.parquet') print(ps.head(20).to_string())