update run.py for fetchprob
This commit is contained in:
@@ -1 +1,2 @@
|
|||||||
click>=8.1
|
click>=8.1
|
||||||
|
latex2mathml>=3.77.0
|
||||||
577
run.py
577
run.py
@@ -2,14 +2,24 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import base64
|
||||||
from enum import Enum, unique, auto
|
from enum import Enum, unique, auto
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from html import escape, unescape
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
|
try:
|
||||||
|
from latex2mathml.converter import convert as latex_to_mathml
|
||||||
|
except ImportError:
|
||||||
|
latex_to_mathml = None
|
||||||
|
|
||||||
CFG_PATH = "./config.yml"
|
CFG_PATH = "./config.yml"
|
||||||
STATE_PATH = "./state.yml"
|
STATE_PATH = "./state.yml"
|
||||||
|
|
||||||
@@ -200,6 +210,502 @@ def parse_range_string_list(str_list) -> list[int]:
|
|||||||
return list(result)
|
return list(result)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_fetchprob_target(target: str) -> tuple[str, str | None]:
|
||||||
|
"""
|
||||||
|
fetchprob target parser.
|
||||||
|
- zeta/<id>: single mode
|
||||||
|
- zeta: batch mode
|
||||||
|
"""
|
||||||
|
parts = target.split("/", 1)
|
||||||
|
location = parts[0].strip()
|
||||||
|
|
||||||
|
if location != "zeta":
|
||||||
|
raise click.UsageError("fetchprob target must start with 'zeta'")
|
||||||
|
|
||||||
|
if len(parts) == 1:
|
||||||
|
return location, None
|
||||||
|
|
||||||
|
problem_id = parts[1].strip()
|
||||||
|
if not problem_id.isdigit():
|
||||||
|
raise click.UsageError("problem id must be numeric (e.g. zeta/2447)")
|
||||||
|
|
||||||
|
return location, problem_id
|
||||||
|
|
||||||
|
|
||||||
|
def extract_problem_id_from_stem(stem: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Extract BOJ numeric id from file stem.
|
||||||
|
Accepted forms: <id>, <id>_<suffix>, <id>-<suffix>
|
||||||
|
"""
|
||||||
|
m = re.match(r"^(\d+)(?:[_-].*)?$", stem)
|
||||||
|
return m.group(1) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def collect_zeta_problem_ids() -> list[str]:
|
||||||
|
"""
|
||||||
|
Collect problem ids from storage/zeta/* and storage/zeta/*/completed.
|
||||||
|
"""
|
||||||
|
zeta_dir = pathlib.Path(STORAGE_DIR) / "zeta"
|
||||||
|
if not zeta_dir.is_dir():
|
||||||
|
raise click.ClickException(f"Storage location '{zeta_dir}' not found")
|
||||||
|
|
||||||
|
ids: set[str] = set()
|
||||||
|
for lang_dir in sorted(zeta_dir.iterdir()):
|
||||||
|
if not lang_dir.is_dir() or lang_dir.name.startswith("_"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for f in lang_dir.iterdir():
|
||||||
|
if f.is_file():
|
||||||
|
problem_id = extract_problem_id_from_stem(f.stem)
|
||||||
|
if problem_id:
|
||||||
|
ids.add(problem_id)
|
||||||
|
|
||||||
|
completed_dir = lang_dir / "completed"
|
||||||
|
if completed_dir.is_dir():
|
||||||
|
for f in completed_dir.iterdir():
|
||||||
|
if f.is_file():
|
||||||
|
problem_id = extract_problem_id_from_stem(f.stem)
|
||||||
|
if problem_id:
|
||||||
|
ids.add(problem_id)
|
||||||
|
|
||||||
|
return sorted(ids, key=int)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_boj_problem_html(problem_id: str, timeout: int = 10) -> str:
|
||||||
|
"""
|
||||||
|
Download BOJ problem page raw HTML.
|
||||||
|
"""
|
||||||
|
url = f"https://www.acmicpc.net/problem/{problem_id}"
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
headers={
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/123.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
status = getattr(resp, "status", 200)
|
||||||
|
if status != 200:
|
||||||
|
raise click.ClickException(
|
||||||
|
f"failed to fetch problem {problem_id}: HTTP {status}"
|
||||||
|
)
|
||||||
|
return resp.read().decode("utf-8", errors="replace")
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
raise click.ClickException(
|
||||||
|
f"failed to fetch problem {problem_id}: HTTP {e.code}"
|
||||||
|
) from e
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
raise click.ClickException(
|
||||||
|
f"network error while fetching problem {problem_id}: {e.reason}"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
|
||||||
|
def _problem_static_html_path(problem_id: str) -> pathlib.Path:
|
||||||
|
return pathlib.Path(STORAGE_DIR) / "zeta" / "_static" / f"{problem_id}.html"
|
||||||
|
|
||||||
|
|
||||||
|
def _problem_static_assets_dir(problem_id: str) -> pathlib.Path:
|
||||||
|
return pathlib.Path(STORAGE_DIR) / "zeta" / "_static" / "assets" / problem_id
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_image_mime(src_url: str, content_type: str | None) -> str:
|
||||||
|
if content_type:
|
||||||
|
mime = content_type.split(";", 1)[0].strip().lower()
|
||||||
|
if mime.startswith("image/"):
|
||||||
|
return mime
|
||||||
|
|
||||||
|
parsed = urllib.parse.urlparse(src_url)
|
||||||
|
ext = pathlib.Path(parsed.path).suffix.lower()
|
||||||
|
ext_to_mime = {
|
||||||
|
".jpg": "image/jpeg",
|
||||||
|
".jpeg": "image/jpeg",
|
||||||
|
".png": "image/png",
|
||||||
|
".gif": "image/gif",
|
||||||
|
".webp": "image/webp",
|
||||||
|
".svg": "image/svg+xml",
|
||||||
|
".bmp": "image/bmp",
|
||||||
|
".ico": "image/x-icon",
|
||||||
|
}
|
||||||
|
return ext_to_mime.get(ext, "image/png")
|
||||||
|
|
||||||
|
|
||||||
|
def _download_image_for_offline(problem_id: str, src_url: str, seq: int, force: bool) -> str | None:
|
||||||
|
req = urllib.request.Request(
|
||||||
|
src_url,
|
||||||
|
headers={
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/123.0.0.0 Safari/537.36"
|
||||||
|
),
|
||||||
|
"Referer": f"https://www.acmicpc.net/problem/{problem_id}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
status = getattr(resp, "status", 200)
|
||||||
|
if status != 200:
|
||||||
|
return None
|
||||||
|
content_type = resp.headers.get("Content-Type")
|
||||||
|
image_bytes = resp.read()
|
||||||
|
except (urllib.error.HTTPError, urllib.error.URLError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
mime = _guess_image_mime(src_url, content_type)
|
||||||
|
encoded = base64.b64encode(image_bytes).decode("ascii")
|
||||||
|
return f"data:{mime};base64,{encoded}"
|
||||||
|
|
||||||
|
|
||||||
|
def _localize_images_in_html(problem_id: str, html_fragment: str, force: bool) -> str:
|
||||||
|
base_url = f"https://www.acmicpc.net/problem/{problem_id}"
|
||||||
|
counter = {"i": 0}
|
||||||
|
cache: dict[str, str] = {}
|
||||||
|
|
||||||
|
pattern = re.compile(
|
||||||
|
r'(<img\b[^>]*?\bsrc\s*=\s*)(["\']?)([^"\'>\s]+)(["\']?)',
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
def repl(m: re.Match) -> str:
|
||||||
|
prefix = m.group(1)
|
||||||
|
q1 = m.group(2)
|
||||||
|
src = m.group(3)
|
||||||
|
|
||||||
|
if src.startswith("data:"):
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
|
abs_url = urllib.parse.urljoin(base_url, src)
|
||||||
|
if abs_url in cache:
|
||||||
|
local_src = cache[abs_url]
|
||||||
|
quote = q1 if q1 else '"'
|
||||||
|
return f"{prefix}{quote}{local_src}{quote}"
|
||||||
|
|
||||||
|
counter["i"] += 1
|
||||||
|
local_src = _download_image_for_offline(problem_id, abs_url, counter["i"], force=force)
|
||||||
|
if not local_src:
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
|
cache[abs_url] = local_src
|
||||||
|
quote = q1 if q1 else '"'
|
||||||
|
return f"{prefix}{quote}{local_src}{quote}"
|
||||||
|
|
||||||
|
return pattern.sub(repl, html_fragment)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_html_by_id(raw_html: str, tag: str, element_id: str) -> str | None:
|
||||||
|
pattern = rf"<{tag}[^>]*id=\"{re.escape(element_id)}\"[^>]*>(.*?)</{tag}>"
|
||||||
|
m = re.search(pattern, raw_html, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return m.group(1).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_tags(html_text: str) -> str:
|
||||||
|
text = re.sub(r"<[^>]+>", "", html_text, flags=re.DOTALL)
|
||||||
|
return " ".join(text.split())
|
||||||
|
|
||||||
|
|
||||||
|
def _render_math_expressions(html_fragment: str) -> str:
|
||||||
|
"""
|
||||||
|
Convert TeX math delimiters to MathML for offline rendering.
|
||||||
|
- Inline: $...$
|
||||||
|
- Block: $$...$$
|
||||||
|
"""
|
||||||
|
if latex_to_mathml is None:
|
||||||
|
return html_fragment
|
||||||
|
|
||||||
|
protected_blocks: list[str] = []
|
||||||
|
|
||||||
|
def protect(m: re.Match) -> str:
|
||||||
|
protected_blocks.append(m.group(0))
|
||||||
|
return f"@@PROTECTED_{len(protected_blocks) - 1}@@"
|
||||||
|
|
||||||
|
# Do not touch code/pre blocks.
|
||||||
|
temp = re.sub(
|
||||||
|
r"<(pre|code)\b[^>]*>.*?</\1>",
|
||||||
|
protect,
|
||||||
|
html_fragment,
|
||||||
|
flags=re.DOTALL | re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
def repl_block(m: re.Match) -> str:
|
||||||
|
expr = unescape(m.group(1).strip())
|
||||||
|
if not expr:
|
||||||
|
return m.group(0)
|
||||||
|
try:
|
||||||
|
mathml = latex_to_mathml(expr)
|
||||||
|
return f'<div class="math-block">{mathml}</div>'
|
||||||
|
except Exception:
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
|
def repl_inline(m: re.Match) -> str:
|
||||||
|
expr = unescape(m.group(1).strip())
|
||||||
|
if not expr:
|
||||||
|
return m.group(0)
|
||||||
|
try:
|
||||||
|
mathml = latex_to_mathml(expr)
|
||||||
|
return f'<span class="math-inline">{mathml}</span>'
|
||||||
|
except Exception:
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
|
temp = re.sub(r"\$\$(.+?)\$\$", repl_block, temp, flags=re.DOTALL)
|
||||||
|
temp = re.sub(r"(?<!\$)\$(?!\$)(.+?)(?<!\$)\$(?!\$)", repl_inline, temp, flags=re.DOTALL)
|
||||||
|
|
||||||
|
for i, block in enumerate(protected_blocks):
|
||||||
|
temp = temp.replace(f"@@PROTECTED_{i}@@", block)
|
||||||
|
|
||||||
|
return temp
|
||||||
|
|
||||||
|
|
||||||
|
def make_offline_problem_html(problem_id: str, raw_html: str, force: bool) -> str:
|
||||||
|
"""
|
||||||
|
Build a self-contained offline-friendly HTML page from BOJ raw HTML.
|
||||||
|
"""
|
||||||
|
title = _extract_html_by_id(raw_html, "span", "problem_title")
|
||||||
|
if not title:
|
||||||
|
title = f"BOJ {problem_id}"
|
||||||
|
|
||||||
|
blocks: list[str] = []
|
||||||
|
core_specs = [
|
||||||
|
("problem_description", "문제"),
|
||||||
|
("problem_input", "입력"),
|
||||||
|
("problem_output", "출력"),
|
||||||
|
("problem_limit", "제한"),
|
||||||
|
("problem_hint", "힌트"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for content_id, fallback_label in core_specs:
|
||||||
|
content = _extract_html_by_id(raw_html, "div", content_id)
|
||||||
|
if not content or not content.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
localized_content = _localize_images_in_html(
|
||||||
|
problem_id,
|
||||||
|
content,
|
||||||
|
force=force,
|
||||||
|
)
|
||||||
|
localized_content = _render_math_expressions(localized_content)
|
||||||
|
|
||||||
|
blocks.append(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"<article class=\"section\">",
|
||||||
|
f"<h2>{fallback_label}</h2>",
|
||||||
|
f"{localized_content}",
|
||||||
|
"</article>",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for sample_type, sample_label in (("sampleinput", "예제 입력"), ("sampleoutput", "예제 출력")):
|
||||||
|
sample_pattern = rf"<section[^>]*id=\"{sample_type}(\d+)\"[^>]*>(.*?)</section>"
|
||||||
|
sample_matches = list(
|
||||||
|
re.finditer(sample_pattern, raw_html, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
)
|
||||||
|
sample_matches.sort(key=lambda m: int(m.group(1)))
|
||||||
|
|
||||||
|
for m in sample_matches:
|
||||||
|
idx = m.group(1)
|
||||||
|
section_html = m.group(2)
|
||||||
|
pre_match = re.search(
|
||||||
|
r"(<pre[^>]*>.*?</pre>)",
|
||||||
|
section_html,
|
||||||
|
flags=re.DOTALL | re.IGNORECASE,
|
||||||
|
)
|
||||||
|
if not pre_match:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pre_html = _localize_images_in_html(
|
||||||
|
problem_id,
|
||||||
|
pre_match.group(1),
|
||||||
|
force=force,
|
||||||
|
)
|
||||||
|
|
||||||
|
h2_match = re.search(
|
||||||
|
r"<h2[^>]*>(.*?)</h2>",
|
||||||
|
section_html,
|
||||||
|
flags=re.DOTALL | re.IGNORECASE,
|
||||||
|
)
|
||||||
|
if h2_match:
|
||||||
|
h2 = _strip_tags(h2_match.group(1))
|
||||||
|
else:
|
||||||
|
h2 = f"{sample_label} {idx}"
|
||||||
|
|
||||||
|
blocks.append(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"<article class=\"section\">",
|
||||||
|
f"<h2>{h2}</h2>",
|
||||||
|
pre_html,
|
||||||
|
"</article>",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not blocks:
|
||||||
|
body_fallback = (
|
||||||
|
"<article class=\"section\">"
|
||||||
|
"<h2>원본 페이지</h2>"
|
||||||
|
"<p>문제 본문 파싱에 실패하여 원본 HTML을 포함합니다.</p>"
|
||||||
|
f"<pre>{escape(raw_html[:100000])}</pre>"
|
||||||
|
"</article>"
|
||||||
|
)
|
||||||
|
blocks.append(body_fallback)
|
||||||
|
|
||||||
|
source_url = f"https://www.acmicpc.net/problem/{problem_id}"
|
||||||
|
content_html = "\n".join(blocks)
|
||||||
|
|
||||||
|
return f"""<!DOCTYPE html>
|
||||||
|
<html lang=\"ko\">
|
||||||
|
<head>
|
||||||
|
<meta charset=\"UTF-8\" />
|
||||||
|
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />
|
||||||
|
<title>BOJ {problem_id} - Offline</title>
|
||||||
|
<style>
|
||||||
|
:root {{
|
||||||
|
--bg: #fafaf8;
|
||||||
|
--paper: #ffffff;
|
||||||
|
--ink: #1e1f24;
|
||||||
|
--muted: #6a6d75;
|
||||||
|
--line: #d8dce3;
|
||||||
|
--accent: #0d6e6e;
|
||||||
|
--code-bg: #f4f6fb;
|
||||||
|
}}
|
||||||
|
* {{ box-sizing: border-box; }}
|
||||||
|
body {{
|
||||||
|
margin: 0;
|
||||||
|
background:
|
||||||
|
radial-gradient(circle at 15% 0%, #f0efe9 0%, transparent 42%),
|
||||||
|
radial-gradient(circle at 85% 20%, #e7f1f2 0%, transparent 38%),
|
||||||
|
var(--bg);
|
||||||
|
color: var(--ink);
|
||||||
|
font-family: "Noto Sans KR", "Pretendard", "Apple SD Gothic Neo", sans-serif;
|
||||||
|
line-height: 1.65;
|
||||||
|
}}
|
||||||
|
main {{
|
||||||
|
max-width: 980px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 24px 16px 56px;
|
||||||
|
}}
|
||||||
|
.header {{
|
||||||
|
background: var(--paper);
|
||||||
|
border: 1px solid var(--line);
|
||||||
|
border-radius: 14px;
|
||||||
|
padding: 18px 20px;
|
||||||
|
margin-bottom: 18px;
|
||||||
|
}}
|
||||||
|
.header h1 {{ margin: 0 0 6px; font-size: 1.5rem; }}
|
||||||
|
.header p {{ margin: 0; color: var(--muted); font-size: 0.95rem; }}
|
||||||
|
.header a {{ color: var(--accent); text-decoration: none; }}
|
||||||
|
.section {{
|
||||||
|
background: var(--paper);
|
||||||
|
border: 1px solid var(--line);
|
||||||
|
border-radius: 14px;
|
||||||
|
padding: 16px 18px;
|
||||||
|
margin-bottom: 14px;
|
||||||
|
overflow-x: auto;
|
||||||
|
}}
|
||||||
|
h2 {{
|
||||||
|
margin: 0 0 10px;
|
||||||
|
font-size: 1.05rem;
|
||||||
|
color: var(--accent);
|
||||||
|
border-bottom: 1px solid var(--line);
|
||||||
|
padding-bottom: 8px;
|
||||||
|
}}
|
||||||
|
pre, code {{
|
||||||
|
font-family: "JetBrains Mono", "Fira Code", monospace;
|
||||||
|
background: var(--code-bg);
|
||||||
|
}}
|
||||||
|
pre {{
|
||||||
|
padding: 12px;
|
||||||
|
border-radius: 10px;
|
||||||
|
border: 1px solid #e7ebf2;
|
||||||
|
overflow: auto;
|
||||||
|
}}
|
||||||
|
blockquote {{
|
||||||
|
margin: 14px 0;
|
||||||
|
padding: 16px 16px 14px 22px;
|
||||||
|
border-left: 4px solid var(--accent);
|
||||||
|
border-radius: 10px;
|
||||||
|
background: linear-gradient(90deg, #eef8f8 0%, #f9fdfd 100%);
|
||||||
|
color: #24313a;
|
||||||
|
font-weight: 600;
|
||||||
|
position: relative;
|
||||||
|
}}
|
||||||
|
blockquote::before {{
|
||||||
|
content: "“";
|
||||||
|
position: absolute;
|
||||||
|
left: 8px;
|
||||||
|
top: 2px;
|
||||||
|
font-size: 1.35rem;
|
||||||
|
line-height: 1;
|
||||||
|
color: #0b5f5f;
|
||||||
|
opacity: 0.7;
|
||||||
|
}}
|
||||||
|
blockquote > :first-child {{ margin-top: 0; }}
|
||||||
|
blockquote > :last-child {{ margin-bottom: 0; }}
|
||||||
|
q {{
|
||||||
|
color: #114f50;
|
||||||
|
font-weight: 700;
|
||||||
|
background: #edf8f8;
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 0 4px;
|
||||||
|
}}
|
||||||
|
.math-inline math {{
|
||||||
|
font-size: 1em;
|
||||||
|
vertical-align: middle;
|
||||||
|
}}
|
||||||
|
.math-block {{
|
||||||
|
margin: 10px 0;
|
||||||
|
padding: 8px 10px;
|
||||||
|
overflow-x: auto;
|
||||||
|
background: #f8fbff;
|
||||||
|
border: 1px solid #e2ecf8;
|
||||||
|
border-radius: 8px;
|
||||||
|
}}
|
||||||
|
.math-block math {{
|
||||||
|
font-size: 1.04em;
|
||||||
|
display: block;
|
||||||
|
}}
|
||||||
|
table {{ border-collapse: collapse; width: 100%; }}
|
||||||
|
th, td {{ border: 1px solid var(--line); padding: 6px 8px; }}
|
||||||
|
img {{ max-width: 100%; height: auto; }}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<main>
|
||||||
|
<header class=\"header\">
|
||||||
|
<h1>{title}</h1>
|
||||||
|
</header>
|
||||||
|
{content_html}
|
||||||
|
</main>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def save_problem_html(problem_id: str, html: str, force: bool) -> str:
|
||||||
|
"""
|
||||||
|
Save html to storage/zeta/_static/<id>.html
|
||||||
|
Return: fetched | skipped
|
||||||
|
"""
|
||||||
|
static_dir = pathlib.Path(STORAGE_DIR) / "zeta" / "_static"
|
||||||
|
static_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
dest = _problem_static_html_path(problem_id)
|
||||||
|
if dest.exists() and not force:
|
||||||
|
return "skipped"
|
||||||
|
|
||||||
|
dest.write_text(html, encoding="utf-8")
|
||||||
|
return "fetched"
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def cli():
|
def cli():
|
||||||
pass
|
pass
|
||||||
@@ -791,6 +1297,76 @@ def find(keyword: str, completed: bool | None):
|
|||||||
click.echo(f" {status} {file_name}.{lang_name}")
|
click.echo(f" {status} {file_name}.{lang_name}")
|
||||||
|
|
||||||
|
|
||||||
|
@click.command(name="fetchprob")
|
||||||
|
@click.argument("target", type=str, nargs=1, required=True)
|
||||||
|
@click.option("--force", "-f", is_flag=True, help="Overwrite existing HTML files")
|
||||||
|
def fetchprob(target: str, force: bool):
|
||||||
|
"""
|
||||||
|
Fetch BOJ problem HTML into storage/zeta/_static.
|
||||||
|
|
||||||
|
TARGET:
|
||||||
|
zeta/<id> Fetch one problem
|
||||||
|
zeta Fetch all detected problem ids under storage/zeta
|
||||||
|
"""
|
||||||
|
location, problem_id = parse_fetchprob_target(target)
|
||||||
|
if location != "zeta":
|
||||||
|
raise click.UsageError("only 'zeta' location is supported")
|
||||||
|
|
||||||
|
if problem_id is not None:
|
||||||
|
if _problem_static_html_path(problem_id).exists() and not force:
|
||||||
|
click.echo(f"{problem_id}: skipped (already exists)")
|
||||||
|
return
|
||||||
|
|
||||||
|
raw_html = fetch_boj_problem_html(problem_id)
|
||||||
|
offline_html = make_offline_problem_html(problem_id, raw_html, force=force)
|
||||||
|
result = save_problem_html(problem_id, offline_html, force=force)
|
||||||
|
if result == "skipped":
|
||||||
|
click.echo(f"{problem_id}: skipped (already exists)")
|
||||||
|
else:
|
||||||
|
click.echo(f"{problem_id}: fetched (offline processed + images)")
|
||||||
|
return
|
||||||
|
|
||||||
|
ids = collect_zeta_problem_ids()
|
||||||
|
if not ids:
|
||||||
|
click.echo("No problem ids found in storage/zeta")
|
||||||
|
return
|
||||||
|
|
||||||
|
attempted = len(ids)
|
||||||
|
fetched = 0
|
||||||
|
skipped = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for pid in ids:
|
||||||
|
try:
|
||||||
|
if _problem_static_html_path(pid).exists() and not force:
|
||||||
|
skipped += 1
|
||||||
|
click.echo(f"{pid}: skipped")
|
||||||
|
continue
|
||||||
|
|
||||||
|
raw_html = fetch_boj_problem_html(pid)
|
||||||
|
offline_html = make_offline_problem_html(pid, raw_html, force=force)
|
||||||
|
result = save_problem_html(pid, offline_html, force=force)
|
||||||
|
if result == "skipped":
|
||||||
|
skipped += 1
|
||||||
|
click.echo(f"{pid}: skipped")
|
||||||
|
else:
|
||||||
|
fetched += 1
|
||||||
|
click.echo(f"{pid}: fetched (offline processed + images)")
|
||||||
|
except click.ClickException as e:
|
||||||
|
failed += 1
|
||||||
|
click.echo(f"{pid}: failed ({e.message})")
|
||||||
|
|
||||||
|
click.echo()
|
||||||
|
click.secho(
|
||||||
|
(
|
||||||
|
f"Summary - attempted: {attempted}, fetched: {fetched}, "
|
||||||
|
f"skipped: {skipped}, failed: {failed}"
|
||||||
|
),
|
||||||
|
fg="cyan",
|
||||||
|
bold=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cli.add_command(run)
|
cli.add_command(run)
|
||||||
cli.add_command(load)
|
cli.add_command(load)
|
||||||
@@ -799,6 +1375,7 @@ cli.add_command(export)
|
|||||||
cli.add_command(state)
|
cli.add_command(state)
|
||||||
cli.add_command(show)
|
cli.add_command(show)
|
||||||
cli.add_command(find)
|
cli.add_command(find)
|
||||||
|
cli.add_command(fetchprob)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli()
|
cli()
|
||||||
|
|||||||
Reference in New Issue
Block a user