fix: 工作簿v3 — 数据源完整镜像源表25列+数字对齐HTML

This commit is contained in:
大师 2026-06-09 03:09:03 +08:00
parent 4634c99ee6
commit 91d82ac475
2 changed files with 138 additions and 128 deletions

View File

@ -1,151 +1,161 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""危大方案看板数据工作簿 v2 — 数据源+汇总表 (Excel原生透视表需手动创建含说明)""" """危大方案看板数据工作簿 v3 — 数据源完整镜像源表结构 + 汇总表"""
import pandas as pd import pandas as pd
from openpyxl import Workbook from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter from openpyxl.utils import get_column_letter
BASE = "/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/2026-06-08/cleaned" BASE = "/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/2026-06-08"
OUT = f"{BASE}/危大方案看板数据工作簿.xlsx" OUT = f"{BASE}/cleaned/危大方案看板数据工作簿.xlsx"
# ── Data ── # ── Read raw source ──
df = pd.read_parquet(f"{BASE}/methods_cleaned.parquet") raw = pd.read_excel("/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/raw/2026-06-08/技术方案统计表.xlsx", header=1)
df['开工年份'] = pd.to_datetime(df['分部分项工程计划开工日期'], errors='coerce').dt.year raw.columns = [str(c).strip() for c in raw.columns]
m = df[(df['是否有效登记']==True)&(df['开工年份']>=2026)].copy() print(f"Source rows: {len(raw)}, cols: {list(raw.columns)[:5]}...")
# ── Read cleaned data for filtering ──
cl = pd.read_parquet(f"{BASE}/cleaned/methods_cleaned.parquet")
valid_idx = cl[cl['是否有效登记'] == True].index.tolist()
cl['开工年份'] = pd.to_datetime(cl['分部分项工程计划开工日期'], errors='coerce').dt.year
yr_idx = cl[(cl['是否有效登记'] == True) & (cl['开工年份'] >= 2026)].index.tolist()
# Merge back to raw columns (align by index)
raw_clean = raw.loc[raw.index.isin(valid_idx)].copy()
raw_2026 = raw.loc[raw.index.isin(yr_idx)].copy()
print(f"Valid rows: {len(raw_clean)}, >=2026: {len(raw_2026)}")
# ── For summary sheets, use cleaned data ──
m = cl[(cl['是否有效登记'] == True) & (cl['开工年份'] >= 2026)].copy()
m['简化状态'] = m['方案状态_clean'].apply(lambda s: '已完成' if '已完成' in str(s) else '未完成') m['简化状态'] = m['方案状态_clean'].apply(lambda s: '已完成' if '已完成' in str(s) else '未完成')
m['是否超规'] = m['是否超一定规模'].astype(str).apply(lambda x: '超规类' if x == '' else '一般类') m['是否超规'] = m['是否超一定规模'].astype(str).apply(lambda x: '超规类' if x == '' else '一般类')
m['开工年份'] = m['开工年份'].astype(int) today = pd.Timestamp('2026-06-08')
today=pd.Timestamp('2026-06-08') m['距开工天'] = (pd.to_datetime(m['分部分项工程计划开工日期']) - today).dt.days.astype(int)
m['距开工天'] = (pd.to_datetime(m['分部分项工程计划开工日期'])-today).dt.days.astype(int)
# Warning
def warn_lev(d, s):
s = str(s); d = int(d)
if '未实施' not in s and '审批中' not in s: return ''
if d <= 30: return '🟠'
if d <= 45: return '🟡'
return ''
m['预警'] = m.apply(lambda r: warn_lev(r['距开工天'], r['方案状态_clean']), axis=1)
# ── Styles ── # ── Styles ──
HDR_F=Font(name='微软雅黑',bold=True,size=10,color='FFFFFF') HDR_F = Font(name='微软雅黑', bold=True, size=10, color='FFFFFF')
HDR_BG=PatternFill('solid',fgColor='1A3A5C') HDR_BG = PatternFill('solid', fgColor='1A3A5C')
TITLE_F=Font(name='微软雅黑',bold=True,size=14,color='1A3A5C') TITLE_F = Font(name='微软雅黑', bold=True, size=14, color='1A3A5C')
SUB_F=Font(name='微软雅黑',bold=True,size=12,color='1A3A5C') SUB_F = Font(name='微软雅黑', bold=True, size=12, color='1A3A5C')
GOLD_LINE=Border(bottom=Side(style='medium',color='C8962E')) GRAY_F = Font(name='微软雅黑', size=9, color='8899AA')
WARN_BG=PatternFill('solid',fgColor='FFF3E0') DATA_F = Font(name='微软雅黑', size=10)
GRAY_F=Font(name='微软雅黑',size=9,color='8899AA') BOLD_F = Font(name='微软雅黑', bold=True, size=10)
DATA_F=Font(name='微软雅黑',size=10) RED_F = Font(name='微软雅黑', bold=True, size=10, color='D94E34')
BOLD_F=Font(name='微软雅黑',bold=True,size=10) GREEN_F = Font(name='微软雅黑', bold=True, size=10, color='2E7D32')
RED_F=Font(name='微软雅黑',bold=True,size=10,color='D94E34') BLUE_F = Font(name='微软雅黑', bold=True, size=10, color='1A3A5C')
GREEN_F=Font(name='微软雅黑',bold=True,size=10,color='2E7D32') WARN_BG = PatternFill('solid', fgColor='FFF3E0')
BLUE_F=Font(name='微软雅黑',bold=True,size=10,color='1A3A5C') BORDER = Border(left=Side('thin', 'DBE2EA'), right=Side('thin', 'DBE2EA'),
BORDER=Border(left=Side('thin','DBE2EA'),right=Side('thin','DBE2EA'),top=Side('thin','DBE2EA'),bottom=Side('thin','DBE2EA')) top=Side('thin', 'DBE2EA'), bottom=Side('thin', 'DBE2EA'))
CENTER=Alignment(horizontal='center',vertical='center') CENTER = Alignment(horizontal='center', vertical='center')
GOLD_BD = Border(bottom=Side(style='medium', color='C8962E'))
def hdr_row(ws,r,cols): def hdr_row(ws, r, cols):
for i,h in enumerate(cols): for i, h in enumerate(cols):
c=ws.cell(r,i+1,h); c.font=HDR_F; c.fill=HDR_BG; c.border=BORDER; c.alignment=CENTER c = ws.cell(r, i+1, h); c.font = HDR_F; c.fill = HDR_BG; c.border = BORDER; c.alignment = CENTER
def data_row(ws,r,vals,fmts=None): def write_data_sheet(ws, df, title):
for i,v in enumerate(vals): """Write a full data sheet from dataframe"""
c=ws.cell(r,i+1,v); c.border=BORDER; c.font=fmts[i] if fmts else DATA_F ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=len(df.columns))
ws.cell(1, 1, title).font = TITLE_F
ws.cell(1, 1).border = GOLD_BD
wb=Workbook() cols = list(df.columns)
hdr_row(ws, 3, cols)
for r, (_, row) in enumerate(df.iterrows()):
for c, col in enumerate(cols):
v = row[col]
if pd.isna(v): v = ''
elif isinstance(v, (pd.Timestamp,)): v = str(v)[:10]
cell = ws.cell(r+4, c+1, v); cell.font = DATA_F; cell.border = BORDER
ws.auto_filter.ref = f'A3:{get_column_letter(len(cols))}{len(df)+3}'
ws.freeze_panes = 'A4'
for i in range(len(cols)):
ws.column_dimensions[get_column_letter(i+1)].width = max(12, min(35, len(str(cols[i]))*2))
# ════ Sheet 0: 透视表说明 ════ wb = Workbook()
s0=wb.active; s0.title='透视表说明'
s0.merge_cells('A1:D1'); s0.cell(1,1,'🗂️ 如何创建 Excel 原生透视表').font=TITLE_F
tips=[('1', '点击下方「数据源」工作表'),
('2', '选中任意单元格 → 插入 → 数据透视表'),
('3', '拖动字段:行=国别/类型, 值=计数 即可'),
('', ''),
('示例透视表:',''),
('年度认定', '行:是否超规 → 值:方案名称(计数)、项目名称(去重)'),
('国别×分类', '行:所属国别 → 列:是否超规 → 值:方案名称'),
('审批进度', '行:简化状态 → 值:方案名称'),
('预警明细', '筛选预警信号≠空白 → 按距开工天排序'),]
for i,(a,b) in enumerate(tips):
s0.cell(i+3,1,a).font=BOLD_F; s0.cell(i+3,2,b).font=DATA_F
s0.column_dimensions['A'].width=15; s0.column_dimensions['B'].width=55
# ════ Sheet 1: 数据源 ════ # ════ Sheet 1: 源表清洗后(完整列) ════
s1=wb.create_sheet('数据源') s1 = wb.active; s1.title = '源表清洗后'
cols=['项目名称','方案名称','所属国别','是否超规','方案状态_clean','简化状态','分部分项工程计划开工日期','开工年份','距开工天'] write_data_sheet(s1, raw_clean, '技术方案统计表 · 清洗后(有效登记 全部年份)')
for i,h in enumerate(cols): s1.cell(1,i+1,h); s1.cell(1,i+1).font=HDR_F; s1.cell(1,i+1).fill=HDR_BG; s1.cell(1,i+1).border=BORDER; s1.cell(1,i+1).alignment=CENTER
for r,(_,row) in enumerate(m[cols].iterrows()):
for c,col in enumerate(cols):
v=row[col];
if pd.isna(v): v=''
elif isinstance(v,(pd.Timestamp,)): v=str(v)[:10]
cell=s1.cell(r+2,c+1,v); cell.font=DATA_F; cell.border=BORDER
s1.auto_filter.ref=f'A1:{get_column_letter(len(cols))}{len(m)+1}'
for i,w in enumerate([38,32,20,8,16,8,14,6,8]): s1.column_dimensions[get_column_letter(i+1)].width=w
s1.freeze_panes='A2'
# ════ Sheet 2: 年度认定汇总 ════ # ════ Sheet 2: 有效2026+ ════
s2=wb.create_sheet('年度认定汇总') s2 = wb.create_sheet('有效≥2026')
s2.merge_cells('A1:D1'); s2.cell(1,1,'年度认定≥2026开工').font=TITLE_F; s2.cell(1,1).border=GOLD_LINE write_data_sheet(s2, raw_2026, '技术方案统计表 · 有效登记 ≥2026年开工')
s2.cell(3,1,'分类'); s2.cell(3,2,'方案数'); s2.cell(3,3,'项目数'); s2.cell(3,4,'占比')
hdr_row(s2,3,['分类','方案数','项目数','占比'])
tot=len(m)
for r,(cat,sub) in enumerate([('一般类',m[m['是否超规']=='一般类']),('超规类',m[m['是否超规']=='超规类'])]):
cnt=len(sub); proj=sub['项目名称'].nunique()
data_row(s2,r+4,[cat,cnt,proj,f'{cnt/tot*100:.0f}%'])
data_row(s2,6,['合计',tot,m['项目名称'].nunique(),'100%'],[BOLD_F]*4)
s2.column_dimensions['A'].width=12
for c in 'BCD': s2.column_dimensions[c].width=10
# ════ Sheet 3: 国别×分类 ════ # ════ Sheet 3: 年度认定汇总 ════
s3=wb.create_sheet('国别×分类') s3 = wb.create_sheet('年度认定汇总')
s3.merge_cells('A1:D1'); s3.cell(1,1,'国别×分类分布').font=TITLE_F; s3.cell(1,1).border=GOLD_LINE s3.merge_cells('A1:D1'); s3.cell(1, 1, '年度认定≥2026开工').font = TITLE_F; s3.cell(1, 1).border = GOLD_BD
ct=m.groupby(['所属国别','是否超规']).size().unstack(fill_value=0) hdr_row(s3, 3, ['分类', '方案数', '项目数', '占比'])
ct['合计']=ct.sum(1); ct.loc['合计']=ct.sum() tot = len(m)
hdr_row(s3,3,['国别']+list(ct.columns)) for r, (cat, sub) in enumerate([('一般类', m[m['是否超规'] == '一般类']), ('超规类', m[m['是否超规'] == '超规类'])]):
for r,(idx,row) in enumerate(ct.iterrows()): cnt = len(sub); proj = sub['项目名称'].nunique()
data_row(s3,r+4,[idx]+[int(v) for v in row]) for c, (v, f) in enumerate(zip([cat, cnt, proj, f'{cnt/tot*100:.0f}%'], [DATA_F, DATA_F, DATA_F, DATA_F])):
s3.column_dimensions['A'].width=25 cell = s3.cell(r+4, c+1, v); cell.font = f; cell.border = BORDER
for c, (v, f) in enumerate(zip(['合计', tot, m['项目名称'].nunique(), '100%'], [BOLD_F]*4)):
cell = s3.cell(6, c+1, v); cell.font = f; cell.border = BORDER
for w, c in zip([12, 10, 10, 10], 'ABCD'): s3.column_dimensions[c].width = w
# ════ Sheet 4: 审批进度 ════ # ════ Sheet 4: 国别×分类 ════
s4=wb.create_sheet('审批进度') s4 = wb.create_sheet('国别×分类')
s4.merge_cells('A1:D1'); s4.cell(1,1,'审批进度 & 三色预警').font=TITLE_F; s4.cell(1,1).border=GOLD_LINE s4.merge_cells('A1:D1'); s4.cell(1, 1, '国别×分类分布').font = TITLE_F; s4.cell(1, 1).border = GOLD_BD
hdr_row(s4,3,['指标','数值','占比','备注']) ct = m.groupby(['所属国别', '是否超规']).size().unstack(fill_value=0)
completed=(m['简化状态']=='已完成').sum(); unfinished=tot-completed ct['合计'] = ct.sum(1); ct.loc['合计'] = ct.sum()
# Warning hdr_row(s4, 3, ['国别'] + list(ct.columns))
def warn_lev(d,s): for r, (idx, row) in enumerate(ct.iterrows()):
s=str(s); d=int(d) for c, v in enumerate([idx] + [int(x) for x in row]):
if '未实施' not in s and '审批中' not in s: return '' cell = s4.cell(r+4, c+1, v); cell.font = DATA_F; cell.border = BORDER
if d<=30: return '🟠' s4.column_dimensions['A'].width = 25
if d<=45: return '🟡'
return ''
m['w']=m.apply(lambda r:warn_lev(r['距开工天'],r['方案状态_clean']),axis=1)
rn=(m['w']!='').sum(); orn=(m['w']=='🟠').sum(); ye=(m['w']=='🟡').sum()
rows=[('方案总数',tot,'100%','≥2026年开工·排除已作废'),
('已完成审批',completed,f'{completed/tot*100:.0f}%',''),
('未完成审批',unfinished,f'{unfinished/tot*100:.0f}%','含审批中+未审批'),
('🟠 橙色预警',orn,f'{orn/tot*100:.0f}%','≤30天未审批'),
('🟡 黄色预警',ye,f'{ye/tot*100:.0f}%','≤45天未审批'),
('预警合计',rn,f'{rn/tot*100:.0f}%','🟠2项+🟡4项'),]
for r,(lab,val,pct,note) in enumerate(rows):
fmts=[DATA_F,DATA_F,DATA_F,GRAY_F]
if '完成' in lab: fmts=[GREEN_F,BOLD_F,BOLD_F,GRAY_F]
if '预警' in lab: fmts=[RED_F,BOLD_F,BOLD_F,GRAY_F]
if '总数' in lab: fmts=[BLUE_F,BOLD_F,BOLD_F,GRAY_F]
data_row(s4,r+4,[lab,val,pct,note],fmts)
s4.column_dimensions['A'].width=15; s4.column_dimensions['B'].width=10
s4.column_dimensions['C'].width=10; s4.column_dimensions['D'].width=35
# ════ Sheet 5: 预警明细 ════ # ════ Sheet 5: 审批进度 & 预警 ════
s5=wb.create_sheet('预警明细') s5 = wb.create_sheet('审批进度')
s5.merge_cells('A1:G1'); s5.cell(1,1,'三色预警明细共6项').font=TITLE_F; s5.cell(1,1).border=GOLD_LINE s5.merge_cells('A1:D1'); s5.cell(1, 1, '审批进度 & 三色预警').font = TITLE_F; s5.cell(1, 1).border = GOLD_BD
hdr_row(s5,3,['信号','类型','项目名称','方案名称','状态','计划开工','距开工']) hdr_row(s5, 3, ['指标', '数值', '占比', '备注'])
warned=m[m['w']!=''].sort_values('距开工天') completed = (m['简化状态'] == '已完成').sum(); unfinished = tot - completed
for r,(_,row) in enumerate(warned.iterrows()): rn = (m['预警'] != '').sum(); orn = (m['预警'] == '🟠').sum(); ye = (m['预警'] == '🟡').sum()
is_w='未审批' in str(row['方案状态_clean']) rows = [
bg=WARN_BG if is_w else None ('方案总数', tot, '100%', '≥2026年开工·排除已作废'),
vals=[row['w'],row['是否超规'],row['项目名称'],row['方案名称'],row['方案状态_clean'], ('已完成审批', completed, f'{completed/tot*100:.0f}%', ''),
str(row['分部分项工程计划开工日期'])[:10],f"{int(row['距开工天'])}"] ('未完成审批', unfinished, f'{unfinished/tot*100:.0f}%', '含审批中+未审批'),
fmts=[DATA_F]*7; fmts[6]=RED_F ('🟠 橙色预警', orn, f'{orn/tot*100:.0f}%', '≤30天未审批'),
for c,(v,f) in enumerate(zip(vals,fmts)): ('🟡 黄色预警', ye, f'{ye/tot*100:.0f}%', '≤45天未审批'),
cell=s5.cell(r+4,c+1,v); cell.font=f; cell.border=BORDER ('预警合计', rn, f'{rn/tot*100:.0f}%', f'🟠{orn}项+🟡{ye}'),
if bg: cell.fill=bg ]
s5.auto_filter.ref=f'A3:G{len(warned)+3}' for r, (lab, val, pct, note) in enumerate(rows):
s5.column_dimensions['A'].width=6; s5.column_dimensions['B'].width=8 fmts = [DATA_F, DATA_F, DATA_F, GRAY_F]
s5.column_dimensions['C'].width=40; s5.column_dimensions['D'].width=35 if '完成' in lab and '' not in lab: fmts = [GREEN_F, BOLD_F, BOLD_F, GRAY_F]
s5.column_dimensions['E'].width=18; s5.column_dimensions['F'].width=12 if '预警' in lab: fmts = [RED_F, BOLD_F, BOLD_F, GRAY_F]
s5.column_dimensions['G'].width=8 if '总数' in lab: fmts = [BLUE_F, BOLD_F, BOLD_F, GRAY_F]
for c, (v, f) in enumerate(zip([lab, val, pct, note], fmts)):
cell = s5.cell(r+4, c+1, v); cell.font = f; cell.border = BORDER
for w, c in zip([18, 10, 10, 35], 'ABCD'): s5.column_dimensions[c].width = w
# ════ Sheet 6: 预警明细 ════
s6 = wb.create_sheet('预警明细')
s6.merge_cells('A1:G1'); s6.cell(1, 1, f'三色预警明细(共{rn}项)').font = TITLE_F; s6.cell(1, 1).border = GOLD_BD
hdr_row(s6, 3, ['信号', '类型', '项目名称', '方案名称', '状态', '计划开工', '距开工'])
warned = m[m['预警'] != ''].sort_values('距开工天')
for r, (_, row) in enumerate(warned.iterrows()):
is_w = '未审批' in str(row['方案状态_clean'])
bg = WARN_BG if is_w else None
vals = [row['预警'], row['是否超规'], row['项目名称'], row['方案名称'],
row['方案状态_clean'], str(row['分部分项工程计划开工日期'])[:10], f"{int(row['距开工天'])}"]
for c, v in enumerate(vals):
cell = s6.cell(r+4, c+1, v); cell.font = RED_F if c == 6 else DATA_F; cell.border = BORDER
if bg: cell.fill = bg
s6.auto_filter.ref = f'A3:G{len(warned)+3}'
for w, c in zip([6, 8, 40, 35, 18, 12, 8], 'ABCDEFG'): s6.column_dimensions[c].width = w
wb.save(OUT) wb.save(OUT)
print(f"{OUT}") print(f"{OUT}")
print(f" 📊 数据源(52行) → 汇总表4个 + 预警明细 + 透视表创建说明") print(f" Sheet1: 源表清洗后 ({len(raw_clean)}× {len(raw_clean.columns)}列)")
print(f" Sheet2: 有效≥2026 ({len(raw_2026)}行)")
print(f" Sheet3-6: 汇总表 + 预警明细")