fix: 工作簿v3 — 数据源完整镜像源表25列+数字对齐HTML

This commit is contained in:
大师 2026-06-09 03:09:03 +08:00
parent 4634c99ee6
commit 91d82ac475
2 changed files with 138 additions and 128 deletions

View File

@ -1,107 +1,37 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""危大方案看板数据工作簿 v2 — 数据源+汇总表 (Excel原生透视表需手动创建含说明)""" """危大方案看板数据工作簿 v3 — 数据源完整镜像源表结构 + 汇总表"""
import pandas as pd import pandas as pd
from openpyxl import Workbook from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter from openpyxl.utils import get_column_letter
BASE = "/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/2026-06-08/cleaned" BASE = "/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/2026-06-08"
OUT = f"{BASE}/危大方案看板数据工作簿.xlsx" OUT = f"{BASE}/cleaned/危大方案看板数据工作簿.xlsx"
# ── Data ── # ── Read raw source ──
df = pd.read_parquet(f"{BASE}/methods_cleaned.parquet") raw = pd.read_excel("/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/raw/2026-06-08/技术方案统计表.xlsx", header=1)
df['开工年份'] = pd.to_datetime(df['分部分项工程计划开工日期'], errors='coerce').dt.year raw.columns = [str(c).strip() for c in raw.columns]
m = df[(df['是否有效登记']==True)&(df['开工年份']>=2026)].copy() print(f"Source rows: {len(raw)}, cols: {list(raw.columns)[:5]}...")
# ── Read cleaned data for filtering ──
cl = pd.read_parquet(f"{BASE}/cleaned/methods_cleaned.parquet")
valid_idx = cl[cl['是否有效登记'] == True].index.tolist()
cl['开工年份'] = pd.to_datetime(cl['分部分项工程计划开工日期'], errors='coerce').dt.year
yr_idx = cl[(cl['是否有效登记'] == True) & (cl['开工年份'] >= 2026)].index.tolist()
# Merge back to raw columns (align by index)
raw_clean = raw.loc[raw.index.isin(valid_idx)].copy()
raw_2026 = raw.loc[raw.index.isin(yr_idx)].copy()
print(f"Valid rows: {len(raw_clean)}, >=2026: {len(raw_2026)}")
# ── For summary sheets, use cleaned data ──
m = cl[(cl['是否有效登记'] == True) & (cl['开工年份'] >= 2026)].copy()
m['简化状态'] = m['方案状态_clean'].apply(lambda s: '已完成' if '已完成' in str(s) else '未完成') m['简化状态'] = m['方案状态_clean'].apply(lambda s: '已完成' if '已完成' in str(s) else '未完成')
m['是否超规'] = m['是否超一定规模'].astype(str).apply(lambda x: '超规类' if x == '' else '一般类') m['是否超规'] = m['是否超一定规模'].astype(str).apply(lambda x: '超规类' if x == '' else '一般类')
m['开工年份'] = m['开工年份'].astype(int)
today = pd.Timestamp('2026-06-08') today = pd.Timestamp('2026-06-08')
m['距开工天'] = (pd.to_datetime(m['分部分项工程计划开工日期']) - today).dt.days.astype(int) m['距开工天'] = (pd.to_datetime(m['分部分项工程计划开工日期']) - today).dt.days.astype(int)
# ── Styles ──
HDR_F=Font(name='微软雅黑',bold=True,size=10,color='FFFFFF')
HDR_BG=PatternFill('solid',fgColor='1A3A5C')
TITLE_F=Font(name='微软雅黑',bold=True,size=14,color='1A3A5C')
SUB_F=Font(name='微软雅黑',bold=True,size=12,color='1A3A5C')
GOLD_LINE=Border(bottom=Side(style='medium',color='C8962E'))
WARN_BG=PatternFill('solid',fgColor='FFF3E0')
GRAY_F=Font(name='微软雅黑',size=9,color='8899AA')
DATA_F=Font(name='微软雅黑',size=10)
BOLD_F=Font(name='微软雅黑',bold=True,size=10)
RED_F=Font(name='微软雅黑',bold=True,size=10,color='D94E34')
GREEN_F=Font(name='微软雅黑',bold=True,size=10,color='2E7D32')
BLUE_F=Font(name='微软雅黑',bold=True,size=10,color='1A3A5C')
BORDER=Border(left=Side('thin','DBE2EA'),right=Side('thin','DBE2EA'),top=Side('thin','DBE2EA'),bottom=Side('thin','DBE2EA'))
CENTER=Alignment(horizontal='center',vertical='center')
def hdr_row(ws,r,cols):
for i,h in enumerate(cols):
c=ws.cell(r,i+1,h); c.font=HDR_F; c.fill=HDR_BG; c.border=BORDER; c.alignment=CENTER
def data_row(ws,r,vals,fmts=None):
for i,v in enumerate(vals):
c=ws.cell(r,i+1,v); c.border=BORDER; c.font=fmts[i] if fmts else DATA_F
wb=Workbook()
# ════ Sheet 0: 透视表说明 ════
s0=wb.active; s0.title='透视表说明'
s0.merge_cells('A1:D1'); s0.cell(1,1,'🗂️ 如何创建 Excel 原生透视表').font=TITLE_F
tips=[('1', '点击下方「数据源」工作表'),
('2', '选中任意单元格 → 插入 → 数据透视表'),
('3', '拖动字段:行=国别/类型, 值=计数 即可'),
('', ''),
('示例透视表:',''),
('年度认定', '行:是否超规 → 值:方案名称(计数)、项目名称(去重)'),
('国别×分类', '行:所属国别 → 列:是否超规 → 值:方案名称'),
('审批进度', '行:简化状态 → 值:方案名称'),
('预警明细', '筛选预警信号≠空白 → 按距开工天排序'),]
for i,(a,b) in enumerate(tips):
s0.cell(i+3,1,a).font=BOLD_F; s0.cell(i+3,2,b).font=DATA_F
s0.column_dimensions['A'].width=15; s0.column_dimensions['B'].width=55
# ════ Sheet 1: 数据源 ════
s1=wb.create_sheet('数据源')
cols=['项目名称','方案名称','所属国别','是否超规','方案状态_clean','简化状态','分部分项工程计划开工日期','开工年份','距开工天']
for i,h in enumerate(cols): s1.cell(1,i+1,h); s1.cell(1,i+1).font=HDR_F; s1.cell(1,i+1).fill=HDR_BG; s1.cell(1,i+1).border=BORDER; s1.cell(1,i+1).alignment=CENTER
for r,(_,row) in enumerate(m[cols].iterrows()):
for c,col in enumerate(cols):
v=row[col];
if pd.isna(v): v=''
elif isinstance(v,(pd.Timestamp,)): v=str(v)[:10]
cell=s1.cell(r+2,c+1,v); cell.font=DATA_F; cell.border=BORDER
s1.auto_filter.ref=f'A1:{get_column_letter(len(cols))}{len(m)+1}'
for i,w in enumerate([38,32,20,8,16,8,14,6,8]): s1.column_dimensions[get_column_letter(i+1)].width=w
s1.freeze_panes='A2'
# ════ Sheet 2: 年度认定汇总 ════
s2=wb.create_sheet('年度认定汇总')
s2.merge_cells('A1:D1'); s2.cell(1,1,'年度认定≥2026开工').font=TITLE_F; s2.cell(1,1).border=GOLD_LINE
s2.cell(3,1,'分类'); s2.cell(3,2,'方案数'); s2.cell(3,3,'项目数'); s2.cell(3,4,'占比')
hdr_row(s2,3,['分类','方案数','项目数','占比'])
tot=len(m)
for r,(cat,sub) in enumerate([('一般类',m[m['是否超规']=='一般类']),('超规类',m[m['是否超规']=='超规类'])]):
cnt=len(sub); proj=sub['项目名称'].nunique()
data_row(s2,r+4,[cat,cnt,proj,f'{cnt/tot*100:.0f}%'])
data_row(s2,6,['合计',tot,m['项目名称'].nunique(),'100%'],[BOLD_F]*4)
s2.column_dimensions['A'].width=12
for c in 'BCD': s2.column_dimensions[c].width=10
# ════ Sheet 3: 国别×分类 ════
s3=wb.create_sheet('国别×分类')
s3.merge_cells('A1:D1'); s3.cell(1,1,'国别×分类分布').font=TITLE_F; s3.cell(1,1).border=GOLD_LINE
ct=m.groupby(['所属国别','是否超规']).size().unstack(fill_value=0)
ct['合计']=ct.sum(1); ct.loc['合计']=ct.sum()
hdr_row(s3,3,['国别']+list(ct.columns))
for r,(idx,row) in enumerate(ct.iterrows()):
data_row(s3,r+4,[idx]+[int(v) for v in row])
s3.column_dimensions['A'].width=25
# ════ Sheet 4: 审批进度 ════
s4=wb.create_sheet('审批进度')
s4.merge_cells('A1:D1'); s4.cell(1,1,'审批进度 & 三色预警').font=TITLE_F; s4.cell(1,1).border=GOLD_LINE
hdr_row(s4,3,['指标','数值','占比','备注'])
completed=(m['简化状态']=='已完成').sum(); unfinished=tot-completed
# Warning # Warning
def warn_lev(d, s): def warn_lev(d, s):
s = str(s); d = int(d) s = str(s); d = int(d)
@ -109,43 +39,123 @@ def warn_lev(d,s):
if d <= 30: return '🟠' if d <= 30: return '🟠'
if d <= 45: return '🟡' if d <= 45: return '🟡'
return '' return ''
m['w']=m.apply(lambda r:warn_lev(r['距开工天'],r['方案状态_clean']),axis=1) m['预警'] = m.apply(lambda r: warn_lev(r['距开工天'], r['方案状态_clean']), axis=1)
rn=(m['w']!='').sum(); orn=(m['w']=='🟠').sum(); ye=(m['w']=='🟡').sum()
rows=[('方案总数',tot,'100%','≥2026年开工·排除已作废'), # ── Styles ──
HDR_F = Font(name='微软雅黑', bold=True, size=10, color='FFFFFF')
HDR_BG = PatternFill('solid', fgColor='1A3A5C')
TITLE_F = Font(name='微软雅黑', bold=True, size=14, color='1A3A5C')
SUB_F = Font(name='微软雅黑', bold=True, size=12, color='1A3A5C')
GRAY_F = Font(name='微软雅黑', size=9, color='8899AA')
DATA_F = Font(name='微软雅黑', size=10)
BOLD_F = Font(name='微软雅黑', bold=True, size=10)
RED_F = Font(name='微软雅黑', bold=True, size=10, color='D94E34')
GREEN_F = Font(name='微软雅黑', bold=True, size=10, color='2E7D32')
BLUE_F = Font(name='微软雅黑', bold=True, size=10, color='1A3A5C')
WARN_BG = PatternFill('solid', fgColor='FFF3E0')
BORDER = Border(left=Side('thin', 'DBE2EA'), right=Side('thin', 'DBE2EA'),
top=Side('thin', 'DBE2EA'), bottom=Side('thin', 'DBE2EA'))
CENTER = Alignment(horizontal='center', vertical='center')
GOLD_BD = Border(bottom=Side(style='medium', color='C8962E'))
def hdr_row(ws, r, cols):
for i, h in enumerate(cols):
c = ws.cell(r, i+1, h); c.font = HDR_F; c.fill = HDR_BG; c.border = BORDER; c.alignment = CENTER
def write_data_sheet(ws, df, title):
"""Write a full data sheet from dataframe"""
ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=len(df.columns))
ws.cell(1, 1, title).font = TITLE_F
ws.cell(1, 1).border = GOLD_BD
cols = list(df.columns)
hdr_row(ws, 3, cols)
for r, (_, row) in enumerate(df.iterrows()):
for c, col in enumerate(cols):
v = row[col]
if pd.isna(v): v = ''
elif isinstance(v, (pd.Timestamp,)): v = str(v)[:10]
cell = ws.cell(r+4, c+1, v); cell.font = DATA_F; cell.border = BORDER
ws.auto_filter.ref = f'A3:{get_column_letter(len(cols))}{len(df)+3}'
ws.freeze_panes = 'A4'
for i in range(len(cols)):
ws.column_dimensions[get_column_letter(i+1)].width = max(12, min(35, len(str(cols[i]))*2))
wb = Workbook()
# ════ Sheet 1: 源表清洗后(完整列) ════
s1 = wb.active; s1.title = '源表清洗后'
write_data_sheet(s1, raw_clean, '技术方案统计表 · 清洗后(有效登记 全部年份)')
# ════ Sheet 2: 有效2026+ ════
s2 = wb.create_sheet('有效≥2026')
write_data_sheet(s2, raw_2026, '技术方案统计表 · 有效登记 ≥2026年开工')
# ════ Sheet 3: 年度认定汇总 ════
s3 = wb.create_sheet('年度认定汇总')
s3.merge_cells('A1:D1'); s3.cell(1, 1, '年度认定≥2026开工').font = TITLE_F; s3.cell(1, 1).border = GOLD_BD
hdr_row(s3, 3, ['分类', '方案数', '项目数', '占比'])
tot = len(m)
for r, (cat, sub) in enumerate([('一般类', m[m['是否超规'] == '一般类']), ('超规类', m[m['是否超规'] == '超规类'])]):
cnt = len(sub); proj = sub['项目名称'].nunique()
for c, (v, f) in enumerate(zip([cat, cnt, proj, f'{cnt/tot*100:.0f}%'], [DATA_F, DATA_F, DATA_F, DATA_F])):
cell = s3.cell(r+4, c+1, v); cell.font = f; cell.border = BORDER
for c, (v, f) in enumerate(zip(['合计', tot, m['项目名称'].nunique(), '100%'], [BOLD_F]*4)):
cell = s3.cell(6, c+1, v); cell.font = f; cell.border = BORDER
for w, c in zip([12, 10, 10, 10], 'ABCD'): s3.column_dimensions[c].width = w
# ════ Sheet 4: 国别×分类 ════
s4 = wb.create_sheet('国别×分类')
s4.merge_cells('A1:D1'); s4.cell(1, 1, '国别×分类分布').font = TITLE_F; s4.cell(1, 1).border = GOLD_BD
ct = m.groupby(['所属国别', '是否超规']).size().unstack(fill_value=0)
ct['合计'] = ct.sum(1); ct.loc['合计'] = ct.sum()
hdr_row(s4, 3, ['国别'] + list(ct.columns))
for r, (idx, row) in enumerate(ct.iterrows()):
for c, v in enumerate([idx] + [int(x) for x in row]):
cell = s4.cell(r+4, c+1, v); cell.font = DATA_F; cell.border = BORDER
s4.column_dimensions['A'].width = 25
# ════ Sheet 5: 审批进度 & 预警 ════
s5 = wb.create_sheet('审批进度')
s5.merge_cells('A1:D1'); s5.cell(1, 1, '审批进度 & 三色预警').font = TITLE_F; s5.cell(1, 1).border = GOLD_BD
hdr_row(s5, 3, ['指标', '数值', '占比', '备注'])
completed = (m['简化状态'] == '已完成').sum(); unfinished = tot - completed
rn = (m['预警'] != '').sum(); orn = (m['预警'] == '🟠').sum(); ye = (m['预警'] == '🟡').sum()
rows = [
('方案总数', tot, '100%', '≥2026年开工·排除已作废'),
('已完成审批', completed, f'{completed/tot*100:.0f}%', ''), ('已完成审批', completed, f'{completed/tot*100:.0f}%', ''),
('未完成审批', unfinished, f'{unfinished/tot*100:.0f}%', '含审批中+未审批'), ('未完成审批', unfinished, f'{unfinished/tot*100:.0f}%', '含审批中+未审批'),
('🟠 橙色预警', orn, f'{orn/tot*100:.0f}%', '≤30天未审批'), ('🟠 橙色预警', orn, f'{orn/tot*100:.0f}%', '≤30天未审批'),
('🟡 黄色预警', ye, f'{ye/tot*100:.0f}%', '≤45天未审批'), ('🟡 黄色预警', ye, f'{ye/tot*100:.0f}%', '≤45天未审批'),
('预警合计',rn,f'{rn/tot*100:.0f}%','🟠2项+🟡4项'),] ('预警合计', rn, f'{rn/tot*100:.0f}%', f'🟠{orn}项+🟡{ye}'),
]
for r, (lab, val, pct, note) in enumerate(rows): for r, (lab, val, pct, note) in enumerate(rows):
fmts = [DATA_F, DATA_F, DATA_F, GRAY_F] fmts = [DATA_F, DATA_F, DATA_F, GRAY_F]
if '完成' in lab: fmts=[GREEN_F,BOLD_F,BOLD_F,GRAY_F] if '完成' in lab and '' not in lab: fmts = [GREEN_F, BOLD_F, BOLD_F, GRAY_F]
if '预警' in lab: fmts = [RED_F, BOLD_F, BOLD_F, GRAY_F] if '预警' in lab: fmts = [RED_F, BOLD_F, BOLD_F, GRAY_F]
if '总数' in lab: fmts = [BLUE_F, BOLD_F, BOLD_F, GRAY_F] if '总数' in lab: fmts = [BLUE_F, BOLD_F, BOLD_F, GRAY_F]
data_row(s4,r+4,[lab,val,pct,note],fmts) for c, (v, f) in enumerate(zip([lab, val, pct, note], fmts)):
s4.column_dimensions['A'].width=15; s4.column_dimensions['B'].width=10 cell = s5.cell(r+4, c+1, v); cell.font = f; cell.border = BORDER
s4.column_dimensions['C'].width=10; s4.column_dimensions['D'].width=35 for w, c in zip([18, 10, 10, 35], 'ABCD'): s5.column_dimensions[c].width = w
# ════ Sheet 5: 预警明细 ════ # ════ Sheet 6: 预警明细 ════
s5=wb.create_sheet('预警明细') s6 = wb.create_sheet('预警明细')
s5.merge_cells('A1:G1'); s5.cell(1,1,'三色预警明细共6项').font=TITLE_F; s5.cell(1,1).border=GOLD_LINE s6.merge_cells('A1:G1'); s6.cell(1, 1, f'三色预警明细(共{rn}项)').font = TITLE_F; s6.cell(1, 1).border = GOLD_BD
hdr_row(s5,3,['信号','类型','项目名称','方案名称','状态','计划开工','距开工']) hdr_row(s6, 3, ['信号', '类型', '项目名称', '方案名称', '状态', '计划开工', '距开工'])
warned=m[m['w']!=''].sort_values('距开工天') warned = m[m['预警'] != ''].sort_values('距开工天')
for r, (_, row) in enumerate(warned.iterrows()): for r, (_, row) in enumerate(warned.iterrows()):
is_w = '未审批' in str(row['方案状态_clean']) is_w = '未审批' in str(row['方案状态_clean'])
bg = WARN_BG if is_w else None bg = WARN_BG if is_w else None
vals=[row['w'],row['是否超规'],row['项目名称'],row['方案名称'],row['方案状态_clean'], vals = [row['预警'], row['是否超规'], row['项目名称'], row['方案名称'],
str(row['分部分项工程计划开工日期'])[:10],f"{int(row['距开工天'])}"] row['方案状态_clean'], str(row['分部分项工程计划开工日期'])[:10], f"{int(row['距开工天'])}"]
fmts=[DATA_F]*7; fmts[6]=RED_F for c, v in enumerate(vals):
for c,(v,f) in enumerate(zip(vals,fmts)): cell = s6.cell(r+4, c+1, v); cell.font = RED_F if c == 6 else DATA_F; cell.border = BORDER
cell=s5.cell(r+4,c+1,v); cell.font=f; cell.border=BORDER
if bg: cell.fill = bg if bg: cell.fill = bg
s5.auto_filter.ref=f'A3:G{len(warned)+3}' s6.auto_filter.ref = f'A3:G{len(warned)+3}'
s5.column_dimensions['A'].width=6; s5.column_dimensions['B'].width=8 for w, c in zip([6, 8, 40, 35, 18, 12, 8], 'ABCDEFG'): s6.column_dimensions[c].width = w
s5.column_dimensions['C'].width=40; s5.column_dimensions['D'].width=35
s5.column_dimensions['E'].width=18; s5.column_dimensions['F'].width=12
s5.column_dimensions['G'].width=8
wb.save(OUT) wb.save(OUT)
print(f"{OUT}") print(f"{OUT}")
print(f" 📊 数据源(52行) → 汇总表4个 + 预警明细 + 透视表创建说明") print(f" Sheet1: 源表清洗后 ({len(raw_clean)}× {len(raw_clean.columns)}列)")
print(f" Sheet2: 有效≥2026 ({len(raw_2026)}行)")
print(f" Sheet3-6: 汇总表 + 预警明细")