fix: 工作簿v3 — 数据源完整镜像源表25列+数字对齐HTML

This commit is contained in:
大师 2026-06-09 03:09:03 +08:00
parent 4634c99ee6
commit 91d82ac475
2 changed files with 138 additions and 128 deletions

View File

@ -1,151 +1,161 @@
#!/usr/bin/env python3
"""危大方案看板数据工作簿 v2 — 数据源+汇总表 (Excel原生透视表需手动创建含说明)"""
"""危大方案看板数据工作簿 v3 — 数据源完整镜像源表结构 + 汇总表"""
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
BASE = "/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/2026-06-08/cleaned"
OUT = f"{BASE}/危大方案看板数据工作簿.xlsx"
BASE = "/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/2026-06-08"
OUT = f"{BASE}/cleaned/危大方案看板数据工作簿.xlsx"
# ── Data ──
df = pd.read_parquet(f"{BASE}/methods_cleaned.parquet")
df['开工年份'] = pd.to_datetime(df['分部分项工程计划开工日期'], errors='coerce').dt.year
m = df[(df['是否有效登记']==True)&(df['开工年份']>=2026)].copy()
# ── Read raw source ──
raw = pd.read_excel("/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/raw/2026-06-08/技术方案统计表.xlsx", header=1)
raw.columns = [str(c).strip() for c in raw.columns]
print(f"Source rows: {len(raw)}, cols: {list(raw.columns)[:5]}...")
# ── Read cleaned data for filtering ──
cl = pd.read_parquet(f"{BASE}/cleaned/methods_cleaned.parquet")
valid_idx = cl[cl['是否有效登记'] == True].index.tolist()
cl['开工年份'] = pd.to_datetime(cl['分部分项工程计划开工日期'], errors='coerce').dt.year
yr_idx = cl[(cl['是否有效登记'] == True) & (cl['开工年份'] >= 2026)].index.tolist()
# Merge back to raw columns (align by index)
raw_clean = raw.loc[raw.index.isin(valid_idx)].copy()
raw_2026 = raw.loc[raw.index.isin(yr_idx)].copy()
print(f"Valid rows: {len(raw_clean)}, >=2026: {len(raw_2026)}")
# ── For summary sheets, use cleaned data ──
m = cl[(cl['是否有效登记'] == True) & (cl['开工年份'] >= 2026)].copy()
m['简化状态'] = m['方案状态_clean'].apply(lambda s: '已完成' if '已完成' in str(s) else '未完成')
m['是否超规'] = m['是否超一定规模'].astype(str).apply(lambda x: '超规类' if x == '' else '一般类')
m['开工年份'] = m['开工年份'].astype(int)
today=pd.Timestamp('2026-06-08')
m['距开工天'] = (pd.to_datetime(m['分部分项工程计划开工日期'])-today).dt.days.astype(int)
today = pd.Timestamp('2026-06-08')
m['距开工天'] = (pd.to_datetime(m['分部分项工程计划开工日期']) - today).dt.days.astype(int)
# Warning
def warn_lev(d, s):
s = str(s); d = int(d)
if '未实施' not in s and '审批中' not in s: return ''
if d <= 30: return '🟠'
if d <= 45: return '🟡'
return ''
m['预警'] = m.apply(lambda r: warn_lev(r['距开工天'], r['方案状态_clean']), axis=1)
# ── Styles ──
HDR_F=Font(name='微软雅黑',bold=True,size=10,color='FFFFFF')
HDR_BG=PatternFill('solid',fgColor='1A3A5C')
TITLE_F=Font(name='微软雅黑',bold=True,size=14,color='1A3A5C')
SUB_F=Font(name='微软雅黑',bold=True,size=12,color='1A3A5C')
GOLD_LINE=Border(bottom=Side(style='medium',color='C8962E'))
WARN_BG=PatternFill('solid',fgColor='FFF3E0')
GRAY_F=Font(name='微软雅黑',size=9,color='8899AA')
DATA_F=Font(name='微软雅黑',size=10)
BOLD_F=Font(name='微软雅黑',bold=True,size=10)
RED_F=Font(name='微软雅黑',bold=True,size=10,color='D94E34')
GREEN_F=Font(name='微软雅黑',bold=True,size=10,color='2E7D32')
BLUE_F=Font(name='微软雅黑',bold=True,size=10,color='1A3A5C')
BORDER=Border(left=Side('thin','DBE2EA'),right=Side('thin','DBE2EA'),top=Side('thin','DBE2EA'),bottom=Side('thin','DBE2EA'))
CENTER=Alignment(horizontal='center',vertical='center')
HDR_F = Font(name='微软雅黑', bold=True, size=10, color='FFFFFF')
HDR_BG = PatternFill('solid', fgColor='1A3A5C')
TITLE_F = Font(name='微软雅黑', bold=True, size=14, color='1A3A5C')
SUB_F = Font(name='微软雅黑', bold=True, size=12, color='1A3A5C')
GRAY_F = Font(name='微软雅黑', size=9, color='8899AA')
DATA_F = Font(name='微软雅黑', size=10)
BOLD_F = Font(name='微软雅黑', bold=True, size=10)
RED_F = Font(name='微软雅黑', bold=True, size=10, color='D94E34')
GREEN_F = Font(name='微软雅黑', bold=True, size=10, color='2E7D32')
BLUE_F = Font(name='微软雅黑', bold=True, size=10, color='1A3A5C')
WARN_BG = PatternFill('solid', fgColor='FFF3E0')
BORDER = Border(left=Side('thin', 'DBE2EA'), right=Side('thin', 'DBE2EA'),
top=Side('thin', 'DBE2EA'), bottom=Side('thin', 'DBE2EA'))
CENTER = Alignment(horizontal='center', vertical='center')
GOLD_BD = Border(bottom=Side(style='medium', color='C8962E'))
def hdr_row(ws,r,cols):
for i,h in enumerate(cols):
c=ws.cell(r,i+1,h); c.font=HDR_F; c.fill=HDR_BG; c.border=BORDER; c.alignment=CENTER
def hdr_row(ws, r, cols):
for i, h in enumerate(cols):
c = ws.cell(r, i+1, h); c.font = HDR_F; c.fill = HDR_BG; c.border = BORDER; c.alignment = CENTER
def data_row(ws,r,vals,fmts=None):
for i,v in enumerate(vals):
c=ws.cell(r,i+1,v); c.border=BORDER; c.font=fmts[i] if fmts else DATA_F
def write_data_sheet(ws, df, title):
"""Write a full data sheet from dataframe"""
ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=len(df.columns))
ws.cell(1, 1, title).font = TITLE_F
ws.cell(1, 1).border = GOLD_BD
wb=Workbook()
cols = list(df.columns)
hdr_row(ws, 3, cols)
for r, (_, row) in enumerate(df.iterrows()):
for c, col in enumerate(cols):
v = row[col]
if pd.isna(v): v = ''
elif isinstance(v, (pd.Timestamp,)): v = str(v)[:10]
cell = ws.cell(r+4, c+1, v); cell.font = DATA_F; cell.border = BORDER
ws.auto_filter.ref = f'A3:{get_column_letter(len(cols))}{len(df)+3}'
ws.freeze_panes = 'A4'
for i in range(len(cols)):
ws.column_dimensions[get_column_letter(i+1)].width = max(12, min(35, len(str(cols[i]))*2))
# ════ Sheet 0: 透视表说明 ════
s0=wb.active; s0.title='透视表说明'
s0.merge_cells('A1:D1'); s0.cell(1,1,'🗂️ 如何创建 Excel 原生透视表').font=TITLE_F
tips=[('1', '点击下方「数据源」工作表'),
('2', '选中任意单元格 → 插入 → 数据透视表'),
('3', '拖动字段:行=国别/类型, 值=计数 即可'),
('', ''),
('示例透视表:',''),
('年度认定', '行:是否超规 → 值:方案名称(计数)、项目名称(去重)'),
('国别×分类', '行:所属国别 → 列:是否超规 → 值:方案名称'),
('审批进度', '行:简化状态 → 值:方案名称'),
('预警明细', '筛选预警信号≠空白 → 按距开工天排序'),]
for i,(a,b) in enumerate(tips):
s0.cell(i+3,1,a).font=BOLD_F; s0.cell(i+3,2,b).font=DATA_F
s0.column_dimensions['A'].width=15; s0.column_dimensions['B'].width=55
wb = Workbook()
# ════ Sheet 1: 数据源 ════
s1=wb.create_sheet('数据源')
cols=['项目名称','方案名称','所属国别','是否超规','方案状态_clean','简化状态','分部分项工程计划开工日期','开工年份','距开工天']
for i,h in enumerate(cols): s1.cell(1,i+1,h); s1.cell(1,i+1).font=HDR_F; s1.cell(1,i+1).fill=HDR_BG; s1.cell(1,i+1).border=BORDER; s1.cell(1,i+1).alignment=CENTER
for r,(_,row) in enumerate(m[cols].iterrows()):
for c,col in enumerate(cols):
v=row[col];
if pd.isna(v): v=''
elif isinstance(v,(pd.Timestamp,)): v=str(v)[:10]
cell=s1.cell(r+2,c+1,v); cell.font=DATA_F; cell.border=BORDER
s1.auto_filter.ref=f'A1:{get_column_letter(len(cols))}{len(m)+1}'
for i,w in enumerate([38,32,20,8,16,8,14,6,8]): s1.column_dimensions[get_column_letter(i+1)].width=w
s1.freeze_panes='A2'
# ════ Sheet 1: 源表清洗后(完整列) ════
s1 = wb.active; s1.title = '源表清洗后'
write_data_sheet(s1, raw_clean, '技术方案统计表 · 清洗后(有效登记 全部年份)')
# ════ Sheet 2: 年度认定汇总 ════
s2=wb.create_sheet('年度认定汇总')
s2.merge_cells('A1:D1'); s2.cell(1,1,'年度认定≥2026开工').font=TITLE_F; s2.cell(1,1).border=GOLD_LINE
s2.cell(3,1,'分类'); s2.cell(3,2,'方案数'); s2.cell(3,3,'项目数'); s2.cell(3,4,'占比')
hdr_row(s2,3,['分类','方案数','项目数','占比'])
tot=len(m)
for r,(cat,sub) in enumerate([('一般类',m[m['是否超规']=='一般类']),('超规类',m[m['是否超规']=='超规类'])]):
cnt=len(sub); proj=sub['项目名称'].nunique()
data_row(s2,r+4,[cat,cnt,proj,f'{cnt/tot*100:.0f}%'])
data_row(s2,6,['合计',tot,m['项目名称'].nunique(),'100%'],[BOLD_F]*4)
s2.column_dimensions['A'].width=12
for c in 'BCD': s2.column_dimensions[c].width=10
# ════ Sheet 2: 有效2026+ ════
s2 = wb.create_sheet('有效≥2026')
write_data_sheet(s2, raw_2026, '技术方案统计表 · 有效登记 ≥2026年开工')
# ════ Sheet 3: 国别×分类 ════
s3=wb.create_sheet('国别×分类')
s3.merge_cells('A1:D1'); s3.cell(1,1,'国别×分类分布').font=TITLE_F; s3.cell(1,1).border=GOLD_LINE
ct=m.groupby(['所属国别','是否超规']).size().unstack(fill_value=0)
ct['合计']=ct.sum(1); ct.loc['合计']=ct.sum()
hdr_row(s3,3,['国别']+list(ct.columns))
for r,(idx,row) in enumerate(ct.iterrows()):
data_row(s3,r+4,[idx]+[int(v) for v in row])
s3.column_dimensions['A'].width=25
# ════ Sheet 3: 年度认定汇总 ════
s3 = wb.create_sheet('年度认定汇总')
s3.merge_cells('A1:D1'); s3.cell(1, 1, '年度认定≥2026开工').font = TITLE_F; s3.cell(1, 1).border = GOLD_BD
hdr_row(s3, 3, ['分类', '方案数', '项目数', '占比'])
tot = len(m)
for r, (cat, sub) in enumerate([('一般类', m[m['是否超规'] == '一般类']), ('超规类', m[m['是否超规'] == '超规类'])]):
cnt = len(sub); proj = sub['项目名称'].nunique()
for c, (v, f) in enumerate(zip([cat, cnt, proj, f'{cnt/tot*100:.0f}%'], [DATA_F, DATA_F, DATA_F, DATA_F])):
cell = s3.cell(r+4, c+1, v); cell.font = f; cell.border = BORDER
for c, (v, f) in enumerate(zip(['合计', tot, m['项目名称'].nunique(), '100%'], [BOLD_F]*4)):
cell = s3.cell(6, c+1, v); cell.font = f; cell.border = BORDER
for w, c in zip([12, 10, 10, 10], 'ABCD'): s3.column_dimensions[c].width = w
# ════ Sheet 4: 审批进度 ════
s4=wb.create_sheet('审批进度')
s4.merge_cells('A1:D1'); s4.cell(1,1,'审批进度 & 三色预警').font=TITLE_F; s4.cell(1,1).border=GOLD_LINE
hdr_row(s4,3,['指标','数值','占比','备注'])
completed=(m['简化状态']=='已完成').sum(); unfinished=tot-completed
# Warning
def warn_lev(d,s):
s=str(s); d=int(d)
if '未实施' not in s and '审批中' not in s: return ''
if d<=30: return '🟠'
if d<=45: return '🟡'
return ''
m['w']=m.apply(lambda r:warn_lev(r['距开工天'],r['方案状态_clean']),axis=1)
rn=(m['w']!='').sum(); orn=(m['w']=='🟠').sum(); ye=(m['w']=='🟡').sum()
rows=[('方案总数',tot,'100%','≥2026年开工·排除已作废'),
('已完成审批',completed,f'{completed/tot*100:.0f}%',''),
('未完成审批',unfinished,f'{unfinished/tot*100:.0f}%','含审批中+未审批'),
('🟠 橙色预警',orn,f'{orn/tot*100:.0f}%','≤30天未审批'),
('🟡 黄色预警',ye,f'{ye/tot*100:.0f}%','≤45天未审批'),
('预警合计',rn,f'{rn/tot*100:.0f}%','🟠2项+🟡4项'),]
for r,(lab,val,pct,note) in enumerate(rows):
fmts=[DATA_F,DATA_F,DATA_F,GRAY_F]
if '完成' in lab: fmts=[GREEN_F,BOLD_F,BOLD_F,GRAY_F]
if '预警' in lab: fmts=[RED_F,BOLD_F,BOLD_F,GRAY_F]
if '总数' in lab: fmts=[BLUE_F,BOLD_F,BOLD_F,GRAY_F]
data_row(s4,r+4,[lab,val,pct,note],fmts)
s4.column_dimensions['A'].width=15; s4.column_dimensions['B'].width=10
s4.column_dimensions['C'].width=10; s4.column_dimensions['D'].width=35
# ════ Sheet 4: 国别×分类 ════
s4 = wb.create_sheet('国别×分类')
s4.merge_cells('A1:D1'); s4.cell(1, 1, '国别×分类分布').font = TITLE_F; s4.cell(1, 1).border = GOLD_BD
ct = m.groupby(['所属国别', '是否超规']).size().unstack(fill_value=0)
ct['合计'] = ct.sum(1); ct.loc['合计'] = ct.sum()
hdr_row(s4, 3, ['国别'] + list(ct.columns))
for r, (idx, row) in enumerate(ct.iterrows()):
for c, v in enumerate([idx] + [int(x) for x in row]):
cell = s4.cell(r+4, c+1, v); cell.font = DATA_F; cell.border = BORDER
s4.column_dimensions['A'].width = 25
# ════ Sheet 5: 预警明细 ════
s5=wb.create_sheet('预警明细')
s5.merge_cells('A1:G1'); s5.cell(1,1,'三色预警明细共6项').font=TITLE_F; s5.cell(1,1).border=GOLD_LINE
hdr_row(s5,3,['信号','类型','项目名称','方案名称','状态','计划开工','距开工'])
warned=m[m['w']!=''].sort_values('距开工天')
for r,(_,row) in enumerate(warned.iterrows()):
is_w='未审批' in str(row['方案状态_clean'])
bg=WARN_BG if is_w else None
vals=[row['w'],row['是否超规'],row['项目名称'],row['方案名称'],row['方案状态_clean'],
str(row['分部分项工程计划开工日期'])[:10],f"{int(row['距开工天'])}"]
fmts=[DATA_F]*7; fmts[6]=RED_F
for c,(v,f) in enumerate(zip(vals,fmts)):
cell=s5.cell(r+4,c+1,v); cell.font=f; cell.border=BORDER
if bg: cell.fill=bg
s5.auto_filter.ref=f'A3:G{len(warned)+3}'
s5.column_dimensions['A'].width=6; s5.column_dimensions['B'].width=8
s5.column_dimensions['C'].width=40; s5.column_dimensions['D'].width=35
s5.column_dimensions['E'].width=18; s5.column_dimensions['F'].width=12
s5.column_dimensions['G'].width=8
# ════ Sheet 5: 审批进度 & 预警 ════
s5 = wb.create_sheet('审批进度')
s5.merge_cells('A1:D1'); s5.cell(1, 1, '审批进度 & 三色预警').font = TITLE_F; s5.cell(1, 1).border = GOLD_BD
hdr_row(s5, 3, ['指标', '数值', '占比', '备注'])
completed = (m['简化状态'] == '已完成').sum(); unfinished = tot - completed
rn = (m['预警'] != '').sum(); orn = (m['预警'] == '🟠').sum(); ye = (m['预警'] == '🟡').sum()
rows = [
('方案总数', tot, '100%', '≥2026年开工·排除已作废'),
('已完成审批', completed, f'{completed/tot*100:.0f}%', ''),
('未完成审批', unfinished, f'{unfinished/tot*100:.0f}%', '含审批中+未审批'),
('🟠 橙色预警', orn, f'{orn/tot*100:.0f}%', '≤30天未审批'),
('🟡 黄色预警', ye, f'{ye/tot*100:.0f}%', '≤45天未审批'),
('预警合计', rn, f'{rn/tot*100:.0f}%', f'🟠{orn}项+🟡{ye}'),
]
for r, (lab, val, pct, note) in enumerate(rows):
fmts = [DATA_F, DATA_F, DATA_F, GRAY_F]
if '完成' in lab and '' not in lab: fmts = [GREEN_F, BOLD_F, BOLD_F, GRAY_F]
if '预警' in lab: fmts = [RED_F, BOLD_F, BOLD_F, GRAY_F]
if '总数' in lab: fmts = [BLUE_F, BOLD_F, BOLD_F, GRAY_F]
for c, (v, f) in enumerate(zip([lab, val, pct, note], fmts)):
cell = s5.cell(r+4, c+1, v); cell.font = f; cell.border = BORDER
for w, c in zip([18, 10, 10, 35], 'ABCD'): s5.column_dimensions[c].width = w
# ════ Sheet 6: 预警明细 ════
s6 = wb.create_sheet('预警明细')
s6.merge_cells('A1:G1'); s6.cell(1, 1, f'三色预警明细(共{rn}项)').font = TITLE_F; s6.cell(1, 1).border = GOLD_BD
hdr_row(s6, 3, ['信号', '类型', '项目名称', '方案名称', '状态', '计划开工', '距开工'])
warned = m[m['预警'] != ''].sort_values('距开工天')
for r, (_, row) in enumerate(warned.iterrows()):
is_w = '未审批' in str(row['方案状态_clean'])
bg = WARN_BG if is_w else None
vals = [row['预警'], row['是否超规'], row['项目名称'], row['方案名称'],
row['方案状态_clean'], str(row['分部分项工程计划开工日期'])[:10], f"{int(row['距开工天'])}"]
for c, v in enumerate(vals):
cell = s6.cell(r+4, c+1, v); cell.font = RED_F if c == 6 else DATA_F; cell.border = BORDER
if bg: cell.fill = bg
s6.auto_filter.ref = f'A3:G{len(warned)+3}'
for w, c in zip([6, 8, 40, 35, 18, 12, 8], 'ABCDEFG'): s6.column_dimensions[c].width = w
wb.save(OUT)
print(f"{OUT}")
print(f" 📊 数据源(52行) → 汇总表4个 + 预警明细 + 透视表创建说明")
print(f" Sheet1: 源表清洗后 ({len(raw_clean)}× {len(raw_clean.columns)}列)")
print(f" Sheet2: 有效≥2026 ({len(raw_2026)}行)")
print(f" Sheet3-6: 汇总表 + 预警明细")