210 lines
8.2 KiB
Python
210 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
"""News Minimalist RSS Server — serves RSS/HTML from scraped cache"""
|
|
import http.server
|
|
import socketserver
|
|
import json
|
|
import os
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from collections import defaultdict
|
|
|
|
PORT = 1202
|
|
CACHE_FILE = '/root/news_cache.json'
|
|
|
|
|
|
def load_cache():
|
|
try:
|
|
with open(CACHE_FILE, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except Exception:
|
|
return {'news': [], 'date': 'unknown', 'count': 0}
|
|
|
|
|
|
def generate_rss(data):
|
|
"""Generate RSS 2.0 XML with Atom self-link."""
|
|
news = data.get('news', [])
|
|
cat = data.get('category', 'all')
|
|
score_range = data.get('score_range', '0-10')
|
|
updated = data.get('updated', '')
|
|
|
|
cat_label = 'All Categories' if cat == 'all' else cat.title()
|
|
|
|
items_xml = ''
|
|
for n in news[:50]:
|
|
title = n.get('title', '')
|
|
title_zh = n.get('title_zh', '')
|
|
link = n.get('link', BASE_URL)
|
|
score = n.get('score')
|
|
source = n.get('source', '')
|
|
summary = n.get('summary', '')
|
|
|
|
prefix = f'[{score}] ' if score is not None else ''
|
|
desc = f'<p><strong>Significance:</strong> {score}/10</p>' if score is not None else ''
|
|
if title_zh:
|
|
desc += f'<p>🇨🇳 <strong>中文:</strong> {title_zh}</p>'
|
|
if summary:
|
|
desc += f'<p><strong>AI Analysis:</strong> {summary}</p>'
|
|
if source:
|
|
desc += f'<p><small>Source: {source}</small></p>'
|
|
|
|
items_xml += f''' <item>
|
|
<title>{prefix}{title}</title>
|
|
<link>{link}</link>
|
|
<guid isPermaLink="true">{link}</guid>
|
|
<description><![CDATA[{desc}]]></description>
|
|
<author>{source or 'News Minimalist'}</author>
|
|
</item>
|
|
'''
|
|
|
|
return f'''<?xml version="1.0" encoding="UTF-8"?>
|
|
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
|
<channel>
|
|
<title>News Minimalist — {cat_label} [{score_range}]</title>
|
|
<link>{BASE_URL}</link>
|
|
<description>AI-curated significant news. Category: {cat_label}, Score: {score_range}. Scored 0-10 by Gemini.</description>
|
|
<language>en</language>
|
|
<lastBuildDate>{updated}</lastBuildDate>
|
|
<atom:link href="{BASE_URL}/rss" rel="self" type="application/rss+xml"/>
|
|
{items_xml} </channel>
|
|
</rss>'''
|
|
|
|
|
|
def generate_html(data):
|
|
"""Generate beautiful HTML page."""
|
|
news = data.get('news', [])
|
|
cache_date = data.get('date', 'unknown')
|
|
updated = data.get('updated', '')
|
|
cat = data.get('category', 'all')
|
|
score_range = data.get('score_range', '0-10')
|
|
|
|
if not news:
|
|
return '''<!DOCTYPE html><html><body style="text-align:center;padding:100px;font-family:sans-serif">
|
|
<h1>📭 No articles cached yet</h1>
|
|
<p>Cache will be populated on next scrape cycle.</p>
|
|
</body></html>'''
|
|
|
|
# Group by score tiers
|
|
hot = [n for n in news if n.get('score') and n['score'] >= 6.5]
|
|
notable = [n for n in news if n.get('score') and 6.0 <= n['score'] < 6.5]
|
|
rest = [n for n in news if n.get('score') and n['score'] < 6.0] + [n for n in news if n.get('score') is None]
|
|
|
|
def render_items(items, color, badge):
|
|
html = ''
|
|
for n in items:
|
|
score = n.get('score')
|
|
title = n.get('title', '')
|
|
title_zh = n.get('title_zh', '')
|
|
link = n.get('link', '')
|
|
source = n.get('source', '')
|
|
summary = n.get('summary', '')
|
|
|
|
display_title = title_zh or title
|
|
subtitle = title if title_zh else ''
|
|
|
|
html += f'''<div class="item">
|
|
<div class="score" style="background:{color}">{badge} {score}</div>
|
|
<div class="content">
|
|
<a class="title" href="{link}" target="_blank">{display_title}</a>
|
|
{f'<div class="title-en">{title}</div>' if subtitle else ''}
|
|
{f'<p class="summary">{summary}</p>' if summary else ''}
|
|
<span class="source">{source or 'newsminimalist.com'}</span>
|
|
</div>
|
|
</div>'''
|
|
return html
|
|
|
|
body = ''
|
|
if hot:
|
|
body += '<h2>🔥 Trending (6.5+)</h2>' + render_items(hot, '#ef4444', '🔥')
|
|
if notable:
|
|
body += '<h2>⭐ Notable (6.0-6.4)</h2>' + render_items(notable, '#3b82f6', '⭐')
|
|
if rest:
|
|
body += '<h2>📰 All Articles</h2>' + render_items(rest, '#22c55e', '📰')
|
|
|
|
return f'''<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>News Minimalist — RSS Feed</title>
|
|
<style>
|
|
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
|
|
body {{ font-family: -apple-system, BlinkMacSystemFont, sans-serif; background: #f8fafc; color: #1e293b; }}
|
|
.header {{ background: linear-gradient(135deg, #1e293b, #334155); color: white; padding: 32px 20px; text-align: center; }}
|
|
.header h1 {{ font-size: 2rem; }}
|
|
.header .meta {{ margin-top: 12px; opacity: 0.8; font-size: 0.9rem; }}
|
|
.header .rss-btn {{ display: inline-block; margin-top: 12px; background: #f97316; color: white; padding: 8px 20px; border-radius: 20px; text-decoration: none; font-weight: 600; }}
|
|
.container {{ max-width: 800px; margin: 0 auto; padding: 24px 16px; }}
|
|
h2 {{ font-size: 1.3rem; margin: 24px 0 12px 0; color: #475569; }}
|
|
.item {{ display: flex; gap: 16px; background: white; border-radius: 12px; padding: 16px; margin-bottom: 12px; box-shadow: 0 1px 3px rgba(0,0,0,0.08); }}
|
|
.score {{ min-width: 60px; height: 60px; display: flex; align-items: center; justify-content: center; border-radius: 10px; color: white; font-weight: bold; font-size: 0.95rem; flex-shrink: 0; }}
|
|
.content {{ flex: 1; }}
|
|
.title {{ font-size: 1.05rem; color: #1e293b; text-decoration: none; line-height: 1.5; }}
|
|
.title:hover {{ color: #3b82f6; }}
|
|
.title-en {{ font-size: 0.8rem; color: #94a3b8; margin-top: 4px; }}
|
|
.summary {{ color: #64748b; font-size: 0.85rem; margin-top: 6px; line-height: 1.5; }}
|
|
.source {{ color: #94a3b8; font-size: 0.75rem; margin-top: 8px; display: block; }}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="header">
|
|
<h1>🤖 News Minimalist</h1>
|
|
<div class="meta">AI-curated news · {cat} · Score {score_range} · Cache: {cache_date}</div>
|
|
<div class="meta">{len(news)} articles</div>
|
|
<a href="/rss" class="rss-btn">📡 RSS Feed</a>
|
|
</div>
|
|
<div class="container">
|
|
{body}
|
|
</div>
|
|
</body>
|
|
</html>'''
|
|
|
|
|
|
BASE_URL = 'https://www.newsminimalist.com'
|
|
|
|
|
|
class Handler(http.server.SimpleHTTPRequestHandler):
|
|
def do_GET(self):
|
|
if self.path in ['/health', '/ping']:
|
|
data = load_cache()
|
|
age = time.time() - os.path.getmtime(CACHE_FILE) if os.path.exists(CACHE_FILE) else 99999
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({
|
|
'status': 'ok',
|
|
'cache_age_hours': round(age / 3600, 1),
|
|
'article_count': data.get('count', 0),
|
|
'date': data.get('date', 'unknown'),
|
|
}).encode())
|
|
return
|
|
|
|
if self.path.startswith('/rss') or self.path == '/feed':
|
|
data = load_cache()
|
|
rss = generate_rss(data)
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'application/rss+xml; charset=utf-8')
|
|
self.send_header('Cache-Control', 'public, max-age=14400')
|
|
self.send_header('Access-Control-Allow-Origin', '*')
|
|
self.end_headers()
|
|
self.wfile.write(rss.encode('utf-8'))
|
|
return
|
|
|
|
if self.path in ['/', '/home', '/index.html']:
|
|
data = load_cache()
|
|
html = generate_html(data)
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
|
self.end_headers()
|
|
self.wfile.write(html.encode('utf-8'))
|
|
return
|
|
|
|
self.send_response(404)
|
|
self.end_headers()
|
|
self.wfile.write(b'Not Found')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print(f'News Minimalist RSS on :{PORT} — scraping newsminimalist.com')
|
|
with socketserver.TCPServer(('', PORT), Handler) as httpd:
|
|
httpd.serve_forever()
|