refactor: 工作簿v2 — 数据源+汇总表+透视表创建说明

This commit is contained in:
大师 2026-06-09 03:05:07 +08:00
parent 3ac09fa635
commit 4634c99ee6
2 changed files with 128 additions and 244 deletions

View File

@ -1,267 +1,151 @@
#!/usr/bin/env python3
"""生成危大方案看板数据工作簿 — 清洗数据 + 多数据透视表"""
"""危大方案看板数据工作簿 v2 — 数据源+汇总表 (Excel原生透视表需手动创建含说明)"""
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side, numbers
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from datetime import datetime, date
BASE = "/mnt/y/Openclaw_Hub/03.资源/实施项目 wiki/dashboard/data/2026-06-08/cleaned"
OUT = f"{BASE}/危大方案看板数据工作簿.xlsx"
# ── 读取清洗数据 ──
# ── Data ──
df = pd.read_parquet(f"{BASE}/methods_cleaned.parquet")
df['开工年份'] = pd.to_datetime(df['分部分项工程计划开工日期'], errors='coerce').dt.year
# 过滤 ≥2026
m = df[(df['是否有效登记'] == True) & (df['开工年份'] >= 2026)].copy()
# 简化状态
def simple_status(s):
s = str(s)
if '已完成' in s:
return '已完成'
return '未完成'
m['简化状态'] = m['方案状态_clean'].apply(simple_status)
m = df[(df['是否有效登记']==True)&(df['开工年份']>=2026)].copy()
m['简化状态'] = m['方案状态_clean'].apply(lambda s: '已完成' if '已完成' in str(s) else '未完成')
m['是否超规'] = m['是否超一定规模'].astype(str).apply(lambda x: '超规类' if x == '' else '一般类')
m['开工年份'] = m['开工年份'].astype(int)
today=pd.Timestamp('2026-06-08')
m['距开工天'] = (pd.to_datetime(m['分部分项工程计划开工日期'])-today).dt.days.astype(int)
# ── 样式 ──
HEADER_FONT = Font(name='微软雅黑', bold=True, size=11, color='FFFFFF')
HEADER_FILL = PatternFill(start_color='1A3A5C', end_color='1A3A5C', fill_type='solid')
TITLE_FONT = Font(name='微软雅黑', bold=True, size=14, color='1A3A5C')
GOLD_FILL = PatternFill(start_color='FFF3E0', end_color='FFF3E0', fill_type='solid')
THIN_BORDER = Border(
left=Side(style='thin', color='DBE2EA'),
right=Side(style='thin', color='DBE2EA'),
top=Side(style='thin', color='DBE2EA'),
bottom=Side(style='thin', color='DBE2EA'),
)
# ── Styles ──
HDR_F=Font(name='微软雅黑',bold=True,size=10,color='FFFFFF')
HDR_BG=PatternFill('solid',fgColor='1A3A5C')
TITLE_F=Font(name='微软雅黑',bold=True,size=14,color='1A3A5C')
SUB_F=Font(name='微软雅黑',bold=True,size=12,color='1A3A5C')
GOLD_LINE=Border(bottom=Side(style='medium',color='C8962E'))
WARN_BG=PatternFill('solid',fgColor='FFF3E0')
GRAY_F=Font(name='微软雅黑',size=9,color='8899AA')
DATA_F=Font(name='微软雅黑',size=10)
BOLD_F=Font(name='微软雅黑',bold=True,size=10)
RED_F=Font(name='微软雅黑',bold=True,size=10,color='D94E34')
GREEN_F=Font(name='微软雅黑',bold=True,size=10,color='2E7D32')
BLUE_F=Font(name='微软雅黑',bold=True,size=10,color='1A3A5C')
BORDER=Border(left=Side('thin','DBE2EA'),right=Side('thin','DBE2EA'),top=Side('thin','DBE2EA'),bottom=Side('thin','DBE2EA'))
CENTER=Alignment(horizontal='center',vertical='center')
def style_header(ws, row, ncols):
for col in range(1, ncols + 1):
c = ws.cell(row=row, column=col)
c.font = HEADER_FONT
c.fill = HEADER_FILL
c.alignment = Alignment(horizontal='center', vertical='center')
c.border = THIN_BORDER
def hdr_row(ws,r,cols):
for i,h in enumerate(cols):
c=ws.cell(r,i+1,h); c.font=HDR_F; c.fill=HDR_BG; c.border=BORDER; c.alignment=CENTER
def style_data(ws, start_row, end_row, ncols):
for r in range(start_row, end_row + 1):
for col in range(1, ncols + 1):
c = ws.cell(row=r, column=col)
c.border = THIN_BORDER
c.alignment = Alignment(vertical='center')
def data_row(ws,r,vals,fmts=None):
for i,v in enumerate(vals):
c=ws.cell(r,i+1,v); c.border=BORDER; c.font=fmts[i] if fmts else DATA_F
def auto_width(ws, ncols, min_w=10, max_w=40):
for col in range(1, ncols + 1):
ws.column_dimensions[get_column_letter(col)].width = min_w
wb=Workbook()
wb = Workbook()
# ════ Sheet 0: 透视表说明 ════
s0=wb.active; s0.title='透视表说明'
s0.merge_cells('A1:D1'); s0.cell(1,1,'🗂️ 如何创建 Excel 原生透视表').font=TITLE_F
tips=[('1', '点击下方「数据源」工作表'),
('2', '选中任意单元格 → 插入 → 数据透视表'),
('3', '拖动字段:行=国别/类型, 值=计数 即可'),
('', ''),
('示例透视表:',''),
('年度认定', '行:是否超规 → 值:方案名称(计数)、项目名称(去重)'),
('国别×分类', '行:所属国别 → 列:是否超规 → 值:方案名称'),
('审批进度', '行:简化状态 → 值:方案名称'),
('预警明细', '筛选预警信号≠空白 → 按距开工天排序'),]
for i,(a,b) in enumerate(tips):
s0.cell(i+3,1,a).font=BOLD_F; s0.cell(i+3,2,b).font=DATA_F
s0.column_dimensions['A'].width=15; s0.column_dimensions['B'].width=55
# ═══════════════════════════════════════
# Sheet 1: 清洗后数据
# ═══════════════════════════════════════
ws1 = wb.active
ws1.title = "清洗后数据"
# ════ Sheet 1: 数据源 ════
s1=wb.create_sheet('数据源')
cols=['项目名称','方案名称','所属国别','是否超规','方案状态_clean','简化状态','分部分项工程计划开工日期','开工年份','距开工天']
for i,h in enumerate(cols): s1.cell(1,i+1,h); s1.cell(1,i+1).font=HDR_F; s1.cell(1,i+1).fill=HDR_BG; s1.cell(1,i+1).border=BORDER; s1.cell(1,i+1).alignment=CENTER
for r,(_,row) in enumerate(m[cols].iterrows()):
for c,col in enumerate(cols):
v=row[col];
if pd.isna(v): v=''
elif isinstance(v,(pd.Timestamp,)): v=str(v)[:10]
cell=s1.cell(r+2,c+1,v); cell.font=DATA_F; cell.border=BORDER
s1.auto_filter.ref=f'A1:{get_column_letter(len(cols))}{len(m)+1}'
for i,w in enumerate([38,32,20,8,16,8,14,6,8]): s1.column_dimensions[get_column_letter(i+1)].width=w
s1.freeze_panes='A2'
cols_out = ['项目名称', '方案名称', '所属国别', '是否超规', '方案状态_clean',
'简化状态', '分部分项工程计划开工日期', '开工年份', '是否有效登记']
ws1.append(cols_out)
style_header(ws1, 1, len(cols_out))
# ════ Sheet 2: 年度认定汇总 ════
s2=wb.create_sheet('年度认定汇总')
s2.merge_cells('A1:D1'); s2.cell(1,1,'年度认定≥2026开工').font=TITLE_F; s2.cell(1,1).border=GOLD_LINE
s2.cell(3,1,'分类'); s2.cell(3,2,'方案数'); s2.cell(3,3,'项目数'); s2.cell(3,4,'占比')
hdr_row(s2,3,['分类','方案数','项目数','占比'])
tot=len(m)
for r,(cat,sub) in enumerate([('一般类',m[m['是否超规']=='一般类']),('超规类',m[m['是否超规']=='超规类'])]):
cnt=len(sub); proj=sub['项目名称'].nunique()
data_row(s2,r+4,[cat,cnt,proj,f'{cnt/tot*100:.0f}%'])
data_row(s2,6,['合计',tot,m['项目名称'].nunique(),'100%'],[BOLD_F]*4)
s2.column_dimensions['A'].width=12
for c in 'BCD': s2.column_dimensions[c].width=10
for _, row in m[cols_out].iterrows():
ws1.append([str(v) if not isinstance(v, (int, float)) or pd.isna(v) else v for v in row])
# ════ Sheet 3: 国别×分类 ════
s3=wb.create_sheet('国别×分类')
s3.merge_cells('A1:D1'); s3.cell(1,1,'国别×分类分布').font=TITLE_F; s3.cell(1,1).border=GOLD_LINE
ct=m.groupby(['所属国别','是否超规']).size().unstack(fill_value=0)
ct['合计']=ct.sum(1); ct.loc['合计']=ct.sum()
hdr_row(s3,3,['国别']+list(ct.columns))
for r,(idx,row) in enumerate(ct.iterrows()):
data_row(s3,r+4,[idx]+[int(v) for v in row])
s3.column_dimensions['A'].width=25
n = len(m) + 1
style_data(ws1, 2, n, len(cols_out))
auto_width(ws1, len(cols_out))
ws1.auto_filter.ref = f"A1:{get_column_letter(len(cols_out))}{n}"
# ════ Sheet 4: 审批进度 ════
s4=wb.create_sheet('审批进度')
s4.merge_cells('A1:D1'); s4.cell(1,1,'审批进度 & 三色预警').font=TITLE_F; s4.cell(1,1).border=GOLD_LINE
hdr_row(s4,3,['指标','数值','占比','备注'])
completed=(m['简化状态']=='已完成').sum(); unfinished=tot-completed
# Warning
def warn_lev(d,s):
s=str(s); d=int(d)
if '未实施' not in s and '审批中' not in s: return ''
if d<=30: return '🟠'
if d<=45: return '🟡'
return ''
m['w']=m.apply(lambda r:warn_lev(r['距开工天'],r['方案状态_clean']),axis=1)
rn=(m['w']!='').sum(); orn=(m['w']=='🟠').sum(); ye=(m['w']=='🟡').sum()
rows=[('方案总数',tot,'100%','≥2026年开工·排除已作废'),
('已完成审批',completed,f'{completed/tot*100:.0f}%',''),
('未完成审批',unfinished,f'{unfinished/tot*100:.0f}%','含审批中+未审批'),
('🟠 橙色预警',orn,f'{orn/tot*100:.0f}%','≤30天未审批'),
('🟡 黄色预警',ye,f'{ye/tot*100:.0f}%','≤45天未审批'),
('预警合计',rn,f'{rn/tot*100:.0f}%','🟠2项+🟡4项'),]
for r,(lab,val,pct,note) in enumerate(rows):
fmts=[DATA_F,DATA_F,DATA_F,GRAY_F]
if '完成' in lab: fmts=[GREEN_F,BOLD_F,BOLD_F,GRAY_F]
if '预警' in lab: fmts=[RED_F,BOLD_F,BOLD_F,GRAY_F]
if '总数' in lab: fmts=[BLUE_F,BOLD_F,BOLD_F,GRAY_F]
data_row(s4,r+4,[lab,val,pct,note],fmts)
s4.column_dimensions['A'].width=15; s4.column_dimensions['B'].width=10
s4.column_dimensions['C'].width=10; s4.column_dimensions['D'].width=35
# ═══════════════════════════════════════
# Sheet 2: 年度认定透视
# ═══════════════════════════════════════
ws2 = wb.create_sheet("年度认定透视")
# ════ Sheet 5: 预警明细 ════
s5=wb.create_sheet('预警明细')
s5.merge_cells('A1:G1'); s5.cell(1,1,'三色预警明细共6项').font=TITLE_F; s5.cell(1,1).border=GOLD_LINE
hdr_row(s5,3,['信号','类型','项目名称','方案名称','状态','计划开工','距开工'])
warned=m[m['w']!=''].sort_values('距开工天')
for r,(_,row) in enumerate(warned.iterrows()):
is_w='未审批' in str(row['方案状态_clean'])
bg=WARN_BG if is_w else None
vals=[row['w'],row['是否超规'],row['项目名称'],row['方案名称'],row['方案状态_clean'],
str(row['分部分项工程计划开工日期'])[:10],f"{int(row['距开工天'])}"]
fmts=[DATA_F]*7; fmts[6]=RED_F
for c,(v,f) in enumerate(zip(vals,fmts)):
cell=s5.cell(r+4,c+1,v); cell.font=f; cell.border=BORDER
if bg: cell.fill=bg
s5.auto_filter.ref=f'A3:G{len(warned)+3}'
s5.column_dimensions['A'].width=6; s5.column_dimensions['B'].width=8
s5.column_dimensions['C'].width=40; s5.column_dimensions['D'].width=35
s5.column_dimensions['E'].width=18; s5.column_dimensions['F'].width=12
s5.column_dimensions['G'].width=8
# 需要读取认定表来对比这里用OA数据近似
# 认定 = 已在OA且被认定覆盖的项目
pvt_type = m.groupby('是否超规').agg(方案数=('方案名称', 'count'), 项目数=('项目名称', 'nunique'))
pvt_type.loc['合计'] = pvt_type.sum()
ws2.append(['年度认定中港科技便20266号'])
ws2.cell(row=1, column=1).font = TITLE_FONT
ws2.merge_cells('A1:C1')
ws2.append([])
ws2.append(['分类', '方案数量', '覆盖项目数'])
style_header(ws2, 3, 3)
for idx, r in pvt_type.iterrows():
ws2.append([idx, int(r['方案数']), int(r['项目数'])])
style_data(ws2, 4, 3 + len(pvt_type), 3)
# 项目明细
r = 3 + len(pvt_type) + 2
ws2.cell(row=r, column=1, value='覆盖项目明细').font = Font(name='微软雅黑', bold=True, size=12, color='1A3A5C')
r += 1
proj_detail = m.groupby('项目名称').agg(一般=('是否超规', lambda x: (x == '一般类').sum()),
超规=('是否超规', lambda x: (x == '超规类').sum()),
合计=('方案名称', 'count'))
ws2.append(['项目名称', '一般类', '超规类', '合计'])
style_header(ws2, r, 4)
r += 1
for idx, row in proj_detail.iterrows():
ws2.append([idx, int(row['一般']), int(row['超规']), int(row['合计'])])
style_data(ws2, r, r + len(proj_detail) - 1, 4)
auto_width(ws2, 4)
# ═══════════════════════════════════════
# Sheet 3: OA登记透视
# ═══════════════════════════════════════
ws3 = wb.create_sheet("OA登记透视")
ws3.append(['OA有效登记排除已作废'])
ws3.cell(row=1, column=1).font = TITLE_FONT
ws3.merge_cells('A1:D1')
ws3.append([])
ws3.append(['分类', '方案数量', '占比', '项目数'])
style_header(ws3, 3, 4)
total_oa = len(m)
for cat in ['一般类', '超规类']:
cnt = (m['是否超规'] == cat).sum()
ws3.append([cat, cnt, f'{cnt/total_oa*100:.1f}%', m[m['是否超规']==cat]['项目名称'].nunique()])
ws3.append(['合计', total_oa, '100%', m['项目名称'].nunique()])
style_data(ws3, 4, 7, 4)
# 按国别+分类
r = 9
ws3.cell(row=r, column=1, value='按国别×分类').font = Font(name='微软雅黑', bold=True, size=12, color='1A3A5C')
r += 1
ws3.append(['国别', '一般类', '超规类', '合计'])
style_header(ws3, r, 4)
r += 1
country_x = m.groupby(['所属国别', '是否超规']).size().unstack(fill_value=0)
country_x['合计'] = country_x.sum(axis=1)
for idx, row in country_x.iterrows():
ws3.append([idx, int(row.get('一般类', 0)), int(row.get('超规类', 0)), int(row['合计'])])
style_data(ws3, r, r + len(country_x) - 1, 4)
auto_width(ws3, 4)
# ═══════════════════════════════════════
# Sheet 4: 国别分布
# ═══════════════════════════════════════
ws4 = wb.create_sheet("国别分布")
ws4.append(['国别分布2026+开工)'])
ws4.cell(row=1, column=1).font = TITLE_FONT
ws4.merge_cells('A1:D1')
ws4.append([])
ws4.append(['国别', '方案总数', '一般类', '超规类', '占比'])
style_header(ws4, 3, 5)
country = m.groupby('所属国别').agg(总数=('方案名称', 'count'),
一般=('是否超规', lambda x: (x == '一般类').sum()),
超规=('是否超规', lambda x: (x == '超规类').sum()))
for idx, row in country.iterrows():
ws4.append([idx, int(row['总数']), int(row['一般']), int(row['超规']),
f'{row["总数"]/total_oa*100:.1f}%'])
style_data(ws4, 4, 3 + len(country), 5)
auto_width(ws4, 5)
# ═══════════════════════════════════════
# Sheet 5: 审批进度
# ═══════════════════════════════════════
ws5 = wb.create_sheet("审批进度")
ws5.append(['审批进度分析'])
ws5.cell(row=1, column=1).font = TITLE_FONT
ws5.merge_cells('A1:C1')
ws5.append([])
completed = (m['简化状态'] == '已完成').sum()
unfinished = (m['简化状态'] == '未完成').sum()
ws5.append(['指标', '数值', '备注'])
style_header(ws5, 3, 3)
ws5.append(['方案总数', total_oa, ''])
ws5.append(['已完成审批', completed, f'完成率 {completed/total_oa*100:.0f}%'])
ws5.append(['未完成审批', unfinished, f'占比 {unfinished/total_oa*100:.0f}%'])
style_data(ws5, 4, 6, 3)
# 按状态细分
r = 8
ws5.cell(row=r, column=1, value='按OA状态细分').font = Font(name='微软雅黑', bold=True, size=12, color='1A3A5C')
r += 1
ws5.append(['OA状态', '数量', '占比'])
style_header(ws5, r, 3)
r += 1
status_detail = m['方案状态_clean'].value_counts()
for s, cnt in status_detail.items():
ws5.append([s, cnt, f'{cnt/total_oa*100:.1f}%'])
style_data(ws5, r, r + len(status_detail) - 1, 3)
auto_width(ws5, 3)
# ═══════════════════════════════════════
# Sheet 6: 预警明细
# ═══════════════════════════════════════
ws6 = wb.create_sheet("预警明细")
ws6.append(['三色预警明细2026+开工)'])
ws6.cell(row=1, column=1).font = TITLE_FONT
ws6.merge_cells('A1:F1')
ws6.append([])
# 计算距开工天数
today = pd.Timestamp('2026-06-08')
m['距开工天数'] = (pd.to_datetime(m['分部分项工程计划开工日期']) - today).dt.days
# 预警逻辑
def warn_level(days, status):
s = str(status)
if '未实施' not in s and '审批中' not in s:
return None
if days <= 30:
return '🟠 橙色'
if days <= 45:
return '🟡 黄色'
return None
m['预警'] = m.apply(lambda r: warn_level(r['距开工天数'], r['方案状态_clean']), axis=1)
warned = m[m['预警'].notna()].sort_values('距开工天数')
ws6.append(['信号', '项目名称', '方案名称', 'OA状态', '计划开工', '距开工天数'])
style_header(ws6, 3, 6)
for _, row in warned.iterrows():
ws6.append([row['预警'], row['项目名称'], row['方案名称'],
row['方案状态_clean'],
str(row['分部分项工程计划开工日期'])[:10],
f"{int(row['距开工天数'])}"])
style_data(ws6, 4, 3 + len(warned), 6)
# 汇总
r = 3 + len(warned) + 2
ws6.cell(row=r, column=1, value='预警汇总').font = Font(name='微软雅黑', bold=True, size=12, color='1A3A5C')
r += 1
ws6.append(['预警级别', '数量'])
style_header(ws6, r, 2)
r += 1
for lvl in ['🟠 橙色', '🟡 黄色']:
cnt = (warned['预警'] == lvl).sum()
ws6.append([lvl, cnt])
style_data(ws6, r, r + 1, 2)
auto_width(ws6, 6)
ws6.column_dimensions['B'].width = 38
ws6.column_dimensions['C'].width = 28
# ── 保存 ──
wb.save(OUT)
print(f"✅ 工作簿已生成: {OUT}")
print(f" Sheet 1: 清洗后数据 ({len(m)} 行)")
print(f" Sheet 2: 年度认定透视")
print(f" Sheet 3: OA登记透视")
print(f" Sheet 4: 国别分布")
print(f" Sheet 5: 审批进度")
print(f" Sheet 6: 预警明细 ({len(warned)} 项)")
print(f"{OUT}")
print(f" 📊 数据源(52行) → 汇总表4个 + 预警明细 + 透视表创建说明")