138 lines
7.0 KiB
Python
138 lines
7.0 KiB
Python
#!/usr/bin/env python3
|
|
"""B1: 技术方案统计表 → 危大方案编审进度看板"""
|
|
import pandas as pd, openpyxl, warnings, json
|
|
from datetime import date
|
|
from pathlib import Path
|
|
warnings.filterwarnings('ignore')
|
|
|
|
DATA_DATE = '2026-06-08'
|
|
RAW_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/raw/月报数据/{DATA_DATE}')
|
|
OUT_DIR = Path(f'/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/{DATA_DATE}/cleaned')
|
|
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
INPUT_FILE = RAW_DIR / '技术方案统计表.xlsx'
|
|
TODAY = date.today()
|
|
|
|
wb = openpyxl.load_workbook(INPUT_FILE, data_only=True)
|
|
ws = wb['sheet1']
|
|
headers = {}
|
|
for cell in ws[2]:
|
|
if cell.value: headers[cell.column_letter] = str(cell.value).strip()
|
|
raw_rows = []
|
|
for row in ws.iter_rows(min_row=3, max_row=ws.max_row, values_only=False):
|
|
vals = {}
|
|
for cell in row:
|
|
try:
|
|
cl = cell.column_letter
|
|
if cl in headers: vals[headers[cl]] = cell.value
|
|
except AttributeError: pass
|
|
if any(v is not None for v in vals.values()): raw_rows.append(vals)
|
|
wb.close()
|
|
df = pd.DataFrame(raw_rows)
|
|
|
|
# Forward-fill grouping columns (NOT 方案名称!)
|
|
for col in ['所属区域','所属国别','项目名称','是否超一定规模']:
|
|
if col in df.columns: df[col] = df[col].ffill()
|
|
|
|
# 产业链: clean numeric garbage + ff by project group + keyword fallback
|
|
CHAIN_KW = {'道桥':['道路','桥梁','桥','公路','高速','交通','立交','铁路','轨道'],
|
|
'建筑':['房建','建筑','大楼','酒店','办公','住宅','别墅','公寓','商场'],
|
|
'管网':['供水','排水','污水','管网','管线','基础设施','雨水'],
|
|
'港口':['港口','码头','水工','海工','疏浚','吹填','航道','船'],
|
|
'电力':['电力','发电','光伏','风电','变电站'],
|
|
'生态环保':['环保','生态','绿化','园林']}
|
|
if '产业链' in df.columns:
|
|
df['产业链'] = df['产业链'].apply(lambda x: None if pd.api.types.is_number(x) or (isinstance(x,str) and x.strip().isdigit()) else x)
|
|
df['产业链'] = df.groupby('项目名称')['产业链'].transform(lambda x: x.ffill())
|
|
def _infer_chain(name):
|
|
if pd.isna(name): return None
|
|
for chain,kws in CHAIN_KW.items():
|
|
for kw in kws:
|
|
if kw in str(name): return chain
|
|
return None
|
|
for idx in df[df['产业链'].isna()].index:
|
|
inferred = _infer_chain(df.at[idx,'项目名称'])
|
|
if inferred: df.at[idx,'产业链'] = inferred
|
|
|
|
# Filter ME + non-empty scheme
|
|
ME = '中国港湾中东区域公司'
|
|
df_me = df[(df['所属区域'] == ME) & (df['方案名称'].notna()) & (df['方案名称'].astype(str).str.strip() != '')].copy()
|
|
|
|
# 国别修正
|
|
CM = {'阿联酋':'阿拉伯联合酋长国','沙特':'沙特阿拉伯','卡塔尔':'卡塔尔','科威特':'科威特','埃及':'埃及','阿曼':'阿曼','巴林':'巴林'}
|
|
for idx in df_me.index:
|
|
p = df_me.at[idx,'项目名称']; c = str(df_me.at[idx,'所属国别'])
|
|
for kw, co in CM.items():
|
|
if kw in str(p) and c != co: df_me.at[idx,'所属国别'] = co
|
|
|
|
df_me['分部分项工程计划开工日期'] = pd.to_datetime(df_me['分部分项工程计划开工日期'], errors='coerce')
|
|
df_me['方案状态_clean'] = df_me['方案状态'].fillna('未知').astype(str)
|
|
|
|
approved_kw = ['已审批', '已备案']
|
|
df_me['是否完成审批'] = df_me['方案状态_clean'].apply(lambda x: any(kw in str(x) for kw in approved_kw))
|
|
df_me['是否有效登记'] = df_me.apply(
|
|
lambda r: not ('已作废' in str(r.get('方案状态_clean','')) or '作废' in str(r.get('方案名称','')) or '拟作废' in str(r.get('方案名称',''))), axis=1)
|
|
df_me['开工年份'] = df_me['分部分项工程计划开工日期'].dt.year
|
|
df_me['开工月份'] = df_me['分部分项工程计划开工日期'].dt.month
|
|
df_me['计划开工日期倒数'] = (df_me['分部分项工程计划开工日期'] - pd.Timestamp(TODAY)).dt.days
|
|
|
|
def calc_warning(row):
|
|
if not row.get('是否有效登记', True): return 'none'
|
|
if row.get('是否完成审批', False): return 'none'
|
|
status = str(row.get('方案状态_clean',''))
|
|
if '在实施' in status and ('未审批' in status or '审批中' in status): return 'red'
|
|
days = row.get('计划开工日期倒数', 999)
|
|
if days is None or pd.isna(days): return 'none'
|
|
if 0 <= days <= 30: return 'orange'
|
|
if 30 < days <= 45: return 'yellow'
|
|
return 'none'
|
|
|
|
df_me['预警信号'] = df_me.apply(calc_warning, axis=1)
|
|
df_me['审批进行中'] = df_me['方案状态_clean'].apply(lambda x: '在实施' in str(x) or '审批中' in str(x))
|
|
df_me['是否逾期未审批'] = (df_me['分部分项工程计划开工日期'] < pd.Timestamp(TODAY)) & (~df_me['是否完成审批'])
|
|
|
|
valid = df_me[df_me['是否有效登记']]
|
|
total = len(valid)
|
|
oversized = (valid['是否超一定规模'].astype(str) == '是').sum()
|
|
approved = valid['是否完成审批'].sum()
|
|
unapproved = total - approved
|
|
warn_count = (valid['预警信号'] != 'none').sum()
|
|
red_count = (valid['预警信号'] == 'red').sum()
|
|
orange_count = (valid['预警信号'] == 'orange').sum()
|
|
countries = df_me['所属国别'].nunique()
|
|
projects = df_me['项目名称'].nunique()
|
|
|
|
print(f'总原始={len(df)} 中东={len(df_me)} 有效={total}')
|
|
print(f'超规={oversized} 已审批={approved}({approved/total*100:.0f}%) 未审批={unapproved}')
|
|
print(f'预警={warn_count}(R{red_count}/O{orange_count}) 国别={countries} 项目={projects}')
|
|
print(f'\n方案状态分布:')
|
|
for s, c in df_me['方案状态_clean'].value_counts().head(15).items():
|
|
print(f' {s}: {c}')
|
|
print(f'\n产业链: {df_me["产业链"].value_counts().to_dict()}')
|
|
|
|
for col in df_me.columns:
|
|
if df_me[col].dtype == 'object':
|
|
try: df_me[col] = df_me[col].astype(str)
|
|
except: pass
|
|
|
|
df_me.to_parquet(OUT_DIR / 'methods_cleaned.parquet', index=False)
|
|
df_me.to_csv(OUT_DIR / 'methods_cleaned.csv', index=False, encoding='utf-8-sig')
|
|
|
|
ps = valid.groupby('项目名称').agg(
|
|
方案总数=('方案名称','count'), 已审批=('是否完成审批','sum'),
|
|
超规=('是否超一定规模', lambda x: (x.astype(str)=='是').sum()),
|
|
红色预警=('预警信号', lambda x: (x=='red').sum()),
|
|
橙色预警=('预警信号', lambda x: (x=='orange').sum()),
|
|
).reset_index()
|
|
ps['审批率'] = (ps['已审批']/ps['方案总数']*100).round(0).astype(int).astype(str)+'%'
|
|
ps['国别'] = ps['项目名称'].map(valid.groupby('项目名称')['所属国别'].first())
|
|
ps = ps.sort_values('方案总数', ascending=False)
|
|
ps.to_csv(OUT_DIR / 'project_summary.csv', index=False, encoding='utf-8-sig')
|
|
|
|
report = {'date':DATA_DATE,'source':str(INPUT_FILE),'total_raw':len(df),'me_rows':len(df_me),
|
|
'summary':{'有效方案':total,'超规':int(oversized),'已审批':int(approved),'审批率':f'{approved/total*100:.0f}%',
|
|
'未审批':int(unapproved),'预警':int(warn_count),'红色':int(red_count),'橙色':int(orange_count),'国别':int(countries),'项目':int(projects)}}
|
|
with open(OUT_DIR/'validation_report.json','w',encoding='utf-8') as f: json.dump(report,f,ensure_ascii=False,indent=2)
|
|
|
|
print(f'\n✅ methods_cleaned.parquet')
|
|
print(ps.head(20).to_string())
|