-
Notifications
You must be signed in to change notification settings - Fork 92
/
publish.py
134 lines (114 loc) · 4.93 KB
/
publish.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import argparse
import logging
import os
import time
from datetime import datetime
from urllib.parse import urljoin
from feedwerk.atom import AtomFeed
from jinja2 import Environment, FileSystemLoader, filters
import config
import db.translation
from db import image
from hacker_news.algolia_api import get_daily_news
from hacker_news.parser import HackerNewsParser
logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(description='Generate hacker news static page')
parser.add_argument("page", choices=['home', 'daily'], help="Specify page to generate (home or daily)")
args = parser.parse_args()
def translate(text, lang):
return db.translation.get(text, lang)
def truncate(text):
return filters.do_truncate(environment, text,
length=config.summary_size,
end=' ...')
environment = Environment(
loader=FileSystemLoader(os.path.join(os.path.dirname(__file__), "templates/")), autoescape=True)
environment.filters["translate"] = translate
environment.filters["truncate"] = truncate
environment.globals["config"] = config
def gen_frontpage():
hn = HackerNewsParser()
news_list = hn.parse_news_list()
for news in news_list:
news.pull_content()
gen_page(news_list, 'index.html', 'en')
gen_page(news_list, 'zh.html', 'zh')
gen_feed(news_list)
def gen_daily():
# yesterday = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1)
# yesterday_summary = os.path.join(config.output_dir, f'daily/{yesterday.strftime("%Y-%m-%d")}/index.html')
# rand = random.random()
# if not os.path.exists(yesterday_summary):
# logger.info(f'Generating a fresh daily page as {yesterday_summary} does not exist')
# elif rand > 0.3:
# logger.info(f'Will not generate daily page this time, rand {rand}')
# return
# else:
logger.info(f'Will refresh daily page for the past {config.updatable_within_days} days')
daily_items = get_daily_news(config.updatable_within_days)
for date, items in daily_items.items():
for i, item in enumerate(items):
item.rank = i
item.pull_content()
gen_page(items, f'daily/{date.strftime("%Y-%m-%d")}/index.html')
# Generate GitHub pages
def gen_page(news_list, path, lang='en'):
if not news_list:
return # no overwrite
template = environment.get_template('hackernews.html')
static_page = os.path.join(config.output_dir, path)
directory = os.path.dirname(static_page)
os.makedirs(directory, exist_ok=True)
start = time.time()
rendered = template.render(news_list=news_list, last_updated=datetime.utcnow(), lang=lang,
path=urljoin(config.site + '/', path.rstrip('index.html')))
with open(static_page, "w") as fp:
fp.write(rendered)
cost = (time.time() - start) * 1000
logger.info(f'Written {len(rendered)} bytes to {static_page}, cost(ms): {cost:.2f}')
def gen_feed(news_list):
start = time.time()
feed = AtomFeed('Hacker News Summary',
updated=datetime.utcnow(),
feed_url=f'{config.site}/feed.xml',
url={config.site},
author={
'name': 'polyrabbit',
'uri': 'https://github.com/polyrabbit/'}
)
for i, news in enumerate(news_list):
if news.get_score() <= config.openai_score_threshold:
# RSS readers doesnot update their content, so wait until we have a better summary, to provide a consistent view to users
continue
img_tag = ''
if news.image:
img_tag = f'<img src="{news.image.url}" style="{news.image.get_size_style(220)}" /><br />'
feed.add(news.title,
content='%s%s%s%s' % (
img_tag,
# not None
truncate(news.summary) if news.summarized_by.can_truncate() else news.summary,
(
' <a href="%s" target="_blank">[summary]</a>' % f'{config.site}/#{news.slug()}'),
(
' <a href="%s" target="_blank">[comments]</a>' % news.comment_url if news.comment_url else '')),
author={
'name': news.author,
'uri': news.author_link
} if news.author_link else (),
url=news.url,
updated=news.submit_time, )
rendered = feed.to_string()
output_path = os.path.join(config.output_dir, "feed.xml")
with open(output_path, "w") as fp:
fp.write(rendered)
cost = (time.time() - start) * 1000
logger.info(f'Written {len(rendered)} bytes to {output_path}, cost(ms): {cost:.2f}')
if __name__ == '__main__':
if args.page == 'daily':
gen_daily()
else:
gen_frontpage()
db.translation.expire()
db.summary.expire()
db.image.expire()