diff --git a/run.py b/run.py index 08b5ed3..86a5aa0 100755 --- a/run.py +++ b/run.py @@ -1,37 +1,89 @@ #!/usr/bin/env python3 -import os -import sys -import pathlib import base64 -from enum import Enum, unique, auto -from dataclasses import dataclass +import os +import pathlib import re import subprocess +import sys import urllib.error import urllib.parse import urllib.request +from dataclasses import dataclass +from enum import Enum, auto, unique from html import escape, unescape -import yaml import click +import yaml try: from latex2mathml.converter import convert as latex_to_mathml except ImportError: latex_to_mathml = None -CFG_PATH = "./config.yml" -STATE_PATH = "./state.yml" +__version__ = "2.0.0" # new version -TC_DIR = "./_testcases" +CFG_PATH = pathlib.Path("./config.yml") +STATE_PATH = pathlib.Path("./state.yml") -TEMPLATES_DIR = "./templates" +TC_DIR = pathlib.Path("./_testcases") +TEMPLATES_DIR = pathlib.Path("./templates") +SRC_SPACE_DIR = pathlib.Path("./space") +STORAGE_DIR = pathlib.Path("./storage") +BUILD_DIR = pathlib.Path("./build") -SRC_SPACE_DIR = "./space" -STORAGE_DIR = "./storage" +# ====== +# Helper Functions +# ====== -BUILD_DIR = "./build" + +def _natural_sort_key(s: str): + """Natural sort key: splits string into text/number chunks for proper ordering.""" + return [int(c) if c.isdigit() else c.lower() for c in re.split(r"(\d+)", s)] + + +def _parse_range_string(range_str: str) -> list[int]: + """ + 범위 문자열을 정수 리스트로 변환. + * ..N: 1부터 N까지 + * M..N: M부터 N까지 + * N: 단일 숫자 + """ + if range_str.startswith(".."): + end_str = range_str[2:] + if not end_str.isdigit(): + raise ValueError("Invalid range format") + return list(range(1, int(end_str) + 1)) + + parts = range_str.split("..") + if len(parts) == 1: + if not parts[0].isdigit(): + raise ValueError("Invalid range format") + return [int(parts[0])] + elif len(parts) == 2: + if not parts[0].isdigit() or not parts[1].isdigit(): + raise ValueError("Invalid range format") + return list(range(int(parts[0]), int(parts[1]) + 1)) + else: + raise ValueError("Invalid range format") + + +def _parse_range_string_list(str_list) -> list[int]: + """ + 여러 범위 문자열을 정수 리스트로 변환하는 함수; 이때 중복은 제거함. + * e.g. ["1..3", "5", "7..9"] -> [1, 2, 3, 5, 7, 8, 9] + """ + result = set() + for s in str_list: + result.update(_parse_range_string(s)) + return list(result) + + +def _dispatch_target(target: str) -> tuple[str, str]: + splited = target.split("/") + if len(splited) != 2: + raise ValueError("Invalid target format. Expected format: /") + return splited[0], splited[1] @unique @@ -170,551 +222,155 @@ class Language(Enum): return Language.UNDEFINED -def natural_sort_key(s: str): - """Natural sort key: splits string into text/number chunks for proper ordering.""" - return [int(c) if c.isdigit() else c.lower() for c in re.split(r"(\d+)", s)] +class StateManager: + _instance: "StateManager | None" = None + _state: dict | None = None + def __new__(cls) -> "StateManager": + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance -def parse_range_string(range_str: str) -> list[int]: - """ - 범위 문자열을 정수 리스트로 변환. - * ..N: 1부터 N까지 - * M..N: M부터 N까지 - * N: 단일 숫자 - """ - if range_str.startswith(".."): - end_str = range_str[2:] - if not end_str.isdigit(): - raise ValueError("Invalid range format") - return list(range(1, int(end_str) + 1)) - - parts = range_str.split("..") - if len(parts) == 1: - if not parts[0].isdigit(): - raise ValueError("Invalid range format") - return [int(parts[0])] - elif len(parts) == 2: - if not parts[0].isdigit() or not parts[1].isdigit(): - raise ValueError("Invalid range format") - return list(range(int(parts[0]), int(parts[1]) + 1)) - else: - raise ValueError("Invalid range format") - - -def parse_range_string_list(str_list) -> list[int]: - """ - 여러 범위 문자열을 정수 리스트로 변환하는 함수; 이때 중복은 제거함. - * e.g. ["1..3", "5", "7..9"] -> [1, 2, 3, 5, 7, 8, 9] - """ - result = set() - for s in str_list: - result.update(parse_range_string(s)) - return list(result) - - -def parse_fetchprob_target(target: str) -> tuple[str, str | None]: - """ - fetchprob target parser. - - zeta/: single mode - - zeta: batch mode - """ - parts = target.split("/", 1) - location = parts[0].strip() - - if location != "zeta": - raise click.UsageError("fetchprob target must start with 'zeta'") - - if len(parts) == 1: - return location, None - - problem_id = parts[1].strip() - if not problem_id.isdigit(): - raise click.UsageError("problem id must be numeric (e.g. zeta/2447)") - - return location, problem_id - - -def extract_problem_id_from_stem(stem: str) -> str | None: - """ - Extract BOJ numeric id from file stem. - Accepted forms: , _, - - """ - m = re.match(r"^(\d+)(?:[_-].*)?$", stem) - return m.group(1) if m else None - - -def collect_zeta_problem_ids() -> list[str]: - """ - Collect problem ids from storage/zeta/* and storage/zeta/*/completed. - """ - zeta_dir = pathlib.Path(STORAGE_DIR) / "zeta" - if not zeta_dir.is_dir(): - raise click.ClickException(f"Storage location '{zeta_dir}' not found") - - ids: set[str] = set() - for lang_dir in sorted(zeta_dir.iterdir()): - if not lang_dir.is_dir() or lang_dir.name.startswith("_"): - continue - - for f in lang_dir.iterdir(): - if f.is_file(): - problem_id = extract_problem_id_from_stem(f.stem) - if problem_id: - ids.add(problem_id) - - completed_dir = lang_dir / "completed" - if completed_dir.is_dir(): - for f in completed_dir.iterdir(): - if f.is_file(): - problem_id = extract_problem_id_from_stem(f.stem) - if problem_id: - ids.add(problem_id) - - return sorted(ids, key=int) - - -def fetch_boj_problem_html(problem_id: str, timeout: int = 10) -> str: - """ - Download BOJ problem page raw HTML. - """ - url = f"https://www.acmicpc.net/problem/{problem_id}" - req = urllib.request.Request( - url, - headers={ - "User-Agent": ( - "Mozilla/5.0 (X11; Linux x86_64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/123.0.0.0 Safari/537.36" - ) - }, - ) - - try: - with urllib.request.urlopen(req, timeout=timeout) as resp: - status = getattr(resp, "status", 200) - if status != 200: - raise click.ClickException( - f"failed to fetch problem {problem_id}: HTTP {status}" - ) - return resp.read().decode("utf-8", errors="replace") - except urllib.error.HTTPError as e: - raise click.ClickException( - f"failed to fetch problem {problem_id}: HTTP {e.code}" - ) from e - except urllib.error.URLError as e: - raise click.ClickException( - f"network error while fetching problem {problem_id}: {e.reason}" - ) from e - - -def _problem_static_html_path(problem_id: str) -> pathlib.Path: - return pathlib.Path(STORAGE_DIR) / "zeta" / "_static" / f"{problem_id}.html" - - -def _problem_static_assets_dir(problem_id: str) -> pathlib.Path: - return pathlib.Path(STORAGE_DIR) / "zeta" / "_static" / "assets" / problem_id - - -def _guess_image_mime(src_url: str, content_type: str | None) -> str: - if content_type: - mime = content_type.split(";", 1)[0].strip().lower() - if mime.startswith("image/"): - return mime - - parsed = urllib.parse.urlparse(src_url) - ext = pathlib.Path(parsed.path).suffix.lower() - ext_to_mime = { - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".png": "image/png", - ".gif": "image/gif", - ".webp": "image/webp", - ".svg": "image/svg+xml", - ".bmp": "image/bmp", - ".ico": "image/x-icon", - } - return ext_to_mime.get(ext, "image/png") - - -def _download_image_for_offline( - problem_id: str, src_url: str, seq: int, force: bool -) -> str | None: - req = urllib.request.Request( - src_url, - headers={ - "User-Agent": ( - "Mozilla/5.0 (X11; Linux x86_64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/123.0.0.0 Safari/537.36" - ), - "Referer": f"https://www.acmicpc.net/problem/{problem_id}", - }, - ) - - try: - with urllib.request.urlopen(req, timeout=10) as resp: - status = getattr(resp, "status", 200) - if status != 200: - return None - content_type = resp.headers.get("Content-Type") - image_bytes = resp.read() - except (urllib.error.HTTPError, urllib.error.URLError): - return None - - mime = _guess_image_mime(src_url, content_type) - encoded = base64.b64encode(image_bytes).decode("ascii") - return f"data:{mime};base64,{encoded}" - - -def _localize_images_in_html(problem_id: str, html_fragment: str, force: bool) -> str: - base_url = f"https://www.acmicpc.net/problem/{problem_id}" - counter = {"i": 0} - cache: dict[str, str] = {} - - pattern = re.compile( - r'(]*?\bsrc\s*=\s*)(["\']?)([^"\'>\s]+)(["\']?)', - flags=re.IGNORECASE, - ) - - def repl(m: re.Match) -> str: - prefix = m.group(1) - q1 = m.group(2) - src = m.group(3) - - if src.startswith("data:"): - return m.group(0) - - abs_url = urllib.parse.urljoin(base_url, src) - if abs_url in cache: - local_src = cache[abs_url] - quote = q1 if q1 else '"' - return f"{prefix}{quote}{local_src}{quote}" - - counter["i"] += 1 - local_src = _download_image_for_offline( - problem_id, abs_url, counter["i"], force=force - ) - if not local_src: - return m.group(0) - - cache[abs_url] = local_src - quote = q1 if q1 else '"' - return f"{prefix}{quote}{local_src}{quote}" - - return pattern.sub(repl, html_fragment) - - -def _extract_html_by_id(raw_html: str, tag: str, element_id: str) -> str | None: - pattern = rf"<{tag}[^>]*id=\"{re.escape(element_id)}\"[^>]*>(.*?)" - m = re.search(pattern, raw_html, flags=re.DOTALL | re.IGNORECASE) - if not m: - return None - return m.group(1).strip() - - -def _strip_tags(html_text: str) -> str: - text = re.sub(r"<[^>]+>", "", html_text, flags=re.DOTALL) - return " ".join(text.split()) - - -def _render_math_expressions(html_fragment: str) -> str: - """ - Convert TeX math delimiters to MathML for offline rendering. - - Inline: $...$ - - Block: $$...$$ - """ - if latex_to_mathml is None: - return html_fragment - - protected_blocks: list[str] = [] - - def protect(m: re.Match) -> str: - protected_blocks.append(m.group(0)) - return f"@@PROTECTED_{len(protected_blocks) - 1}@@" - - # Do not touch code/pre blocks. - temp = re.sub( - r"<(pre|code)\b[^>]*>.*?", - protect, - html_fragment, - flags=re.DOTALL | re.IGNORECASE, - ) - - def repl_block(m: re.Match) -> str: - expr = unescape(m.group(1).strip()) - if not expr: - return m.group(0) + def load(self) -> dict: + if self._state is not None: + return self._state try: - mathml = latex_to_mathml(expr) - return f'
{mathml}
' - except Exception: - return m.group(0) + with open(STATE_PATH, "r", encoding="utf-8") as f: + self._state = yaml.safe_load(f) or {} + except FileNotFoundError as exc: + raise click.ClickException(f"state.yml not found at {STATE_PATH}") from exc + except yaml.YAMLError as e: + raise click.ClickException(f"state.yml parse error: {e}") from e + return self._state # type: ignore[return-value] - def repl_inline(m: re.Match) -> str: - expr = unescape(m.group(1).strip()) - if not expr: - return m.group(0) - try: - mathml = latex_to_mathml(expr) - return f'{mathml}' - except Exception: - return m.group(0) + def save(self) -> None: + if self._state is None: + return + with open(STATE_PATH, "w", encoding="utf-8") as f: + yaml.safe_dump(self._state, f) - temp = re.sub(r"\$\$(.+?)\$\$", repl_block, temp, flags=re.DOTALL) - temp = re.sub( - r"(? dict | None: + state = self.load() + return state.get("space", {}).get(lang) - for i, block in enumerate(protected_blocks): - temp = temp.replace(f"@@PROTECTED_{i}@@", block) + def set_space(self, lang: str, data: dict) -> None: + state = self.load() + if "space" not in state: + state["space"] = {} + state["space"][lang] = data + self._state = state - return temp + def clear_space(self, lang: str) -> None: + state = self.load() + if "space" in state and lang in state["space"]: + del state["space"][lang] + self._state = state + + def reload(self) -> None: + self._state = None -def make_offline_problem_html(problem_id: str, raw_html: str, force: bool) -> str: - """ - Build a self-contained offline-friendly HTML page from BOJ raw HTML. - """ - title = _extract_html_by_id(raw_html, "span", "problem_title") - if not title: - title = f"BOJ {problem_id}" +class StorageManager: + _instance: "StorageManager | None" = None - blocks: list[str] = [] - core_specs = [ - ("problem_description", "문제"), - ("problem_input", "입력"), - ("problem_output", "출력"), - ("problem_limit", "제한"), - ("problem_hint", "힌트"), - ] + def __new__(cls) -> "StorageManager": + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance - for content_id, fallback_label in core_specs: - content = _extract_html_by_id(raw_html, "div", content_id) - if not content or not content.strip(): - continue + def register_location(self, location: str) -> None: + path = STORAGE_DIR / location + os.makedirs(path, exist_ok=True) - localized_content = _localize_images_in_html( - problem_id, - content, - force=force, + def check_location(self, location: str) -> bool: + path = STORAGE_DIR / location + return os.path.isdir(path) + + def register_location_lang(self, location: str, lang: Language) -> None: + path = STORAGE_DIR / location / lang.value + os.makedirs(path, exist_ok=True) + + def check_location_lang(self, location: str, lang: Language) -> bool: + path = STORAGE_DIR / location / lang.value + return os.path.isdir(path) + + def check_get_file( + self, location: str, lang: Language, filename: str + ) -> pathlib.Path | None: + completed_path = ( + STORAGE_DIR + / location + / lang.value + / "completed" + / f"{filename}.{lang.value}" ) - localized_content = _render_math_expressions(localized_content) - - blocks.append( - "\n".join( - [ - '
', - f"

{fallback_label}

", - f"{localized_content}", - "
", - ] - ) + uncompleted_path = ( + STORAGE_DIR / location / lang.value / f"{filename}.{lang.value}" ) - for sample_type, sample_label in ( - ("sampleinput", "예제 입력"), - ("sampleoutput", "예제 출력"), - ): - sample_pattern = rf"]*id=\"{sample_type}(\d+)\"[^>]*>(.*?)" - sample_matches = list( - re.finditer(sample_pattern, raw_html, flags=re.DOTALL | re.IGNORECASE) + if os.path.isfile(completed_path): + return completed_path + elif os.path.isfile(uncompleted_path): + return uncompleted_path + else: + return None + + def save_file( + self, location: str, lang: str, filename: str, content: bytes, completed: bool + ) -> None: + path = STORAGE_DIR / location / lang / ("completed" if completed else "") + os.makedirs(path, exist_ok=True) + with open(f"{path}/{filename}", "wb") as f: + f.write(content) + + def read_file(self, location: str, lang: Language, filename: str) -> bytes: + path = self.check_get_file(location, lang, filename) + if path is None: + raise FileNotFoundError( + f"File not found in storage: {location}/{filename}.{lang.value}" + ) + file_path = f"{path}/{filename}" + if not os.path.isfile(file_path): + raise FileNotFoundError(f"File not found in storage: {file_path}") + with open(file_path, "rb") as f: + return f.read() + + def mark_completed(self, location: str, lang: Language, filename: str) -> None: + uncompleted_file = ( + STORAGE_DIR / location / lang.value / f"{filename}.{lang.value}" ) - sample_matches.sort(key=lambda m: int(m.group(1))) - - for m in sample_matches: - idx = m.group(1) - section_html = m.group(2) - pre_match = re.search( - r"(]*>.*?)", - section_html, - flags=re.DOTALL | re.IGNORECASE, - ) - if not pre_match: - continue - - pre_html = _localize_images_in_html( - problem_id, - pre_match.group(1), - force=force, - ) - - h2_match = re.search( - r"]*>(.*?)", - section_html, - flags=re.DOTALL | re.IGNORECASE, - ) - if h2_match: - h2 = _strip_tags(h2_match.group(1)) - else: - h2 = f"{sample_label} {idx}" - - blocks.append( - "\n".join( - [ - '
', - f"

{h2}

", - pre_html, - "
", - ] - ) - ) - - if not blocks: - body_fallback = ( - '
' - "

원본 페이지

" - "

문제 본문 파싱에 실패하여 원본 HTML을 포함합니다.

" - f"
{escape(raw_html[:100000])}
" - "
" + completed_file = ( + STORAGE_DIR + / location + / lang.value + / "completed" + / f"{filename}.{lang.value}" ) - blocks.append(body_fallback) - source_url = f"https://www.acmicpc.net/problem/{problem_id}" - content_html = "\n".join(blocks) + os.makedirs(completed_file.parent, exist_ok=True) + if os.path.isfile(uncompleted_file): + os.rename(uncompleted_file, completed_file) - return f""" - - - - - BOJ {problem_id} - Offline - - - -
-
-

{title}

-
- {content_html} -
- - -""" + def unmark_completed(self, location: str, lang: Language, filename: str) -> None: + uncompleted_file = ( + STORAGE_DIR / location / lang.value / f"{filename}.{lang.value}" + ) + completed_file = ( + STORAGE_DIR + / location + / lang.value + / "completed" + / f"{filename}.{lang.value}" + ) + + os.makedirs(uncompleted_file.parent, exist_ok=True) + if os.path.isfile(completed_file): + os.rename(completed_file, uncompleted_file) -def save_problem_html(problem_id: str, html: str, force: bool) -> str: - """ - Save html to storage/zeta/_static/.html - Return: fetched | skipped - """ - static_dir = pathlib.Path(STORAGE_DIR) / "zeta" / "_static" - static_dir.mkdir(parents=True, exist_ok=True) - - dest = _problem_static_html_path(problem_id) - if dest.exists() and not force: - return "skipped" - - dest.write_text(html, encoding="utf-8") - return "fetched" +# ======================= +# MAIN CLI LOGIC +# ======================= @click.group() @@ -722,14 +378,29 @@ def cli(): pass +@click.command(name="register") +@click.argument("location", type=str, nargs=1, required=True) +def register(location: str): + """ + 새로운 location을 storage에 등록하는 명령어. + """ + storage_manager = StorageManager() + if storage_manager.check_location(location): + click.echo(f"Location '{location}' is already registered.") + else: + storage_manager.register_location(location) + click.echo(f"Location '{location}' registered successfully.") + @click.command(name="run") -@click.option("--from", "from_", type=str, required=True) -@click.option("--target", "-t", default=["1"], multiple=True) +@click.argument("target", type=str, nargs=1, required=True) +@click.option("--testcase", "-t", default=["1"], multiple=True) @click.option("--verbose/--no-verbose", "-v/-nv", default=True) -def run(from_: str, target: str, verbose: bool): +def run(target: str, testcase: str, verbose: bool): """ 지정된 언어로 빌드 및 실행하는 명령어 """ + loc, prob = _dispatch_target(target) + # Language 확인 from_language: Language = Language.convert_name(from_) @@ -760,7 +431,7 @@ def run(from_: str, target: str, verbose: bool): # 테케 분석 try: - tcs: list[int] = parse_range_string_list(target) + tcs: list[int] = _parse_range_string_list(target) except ValueError as e: raise click.ClickException(e) @@ -1148,7 +819,9 @@ def show(filter: str | None, completed: bool, show_all: bool): continue # uncompleted files - for f in sorted(lang_dir.iterdir(), key=lambda p: natural_sort_key(p.stem)): + for f in sorted( + lang_dir.iterdir(), key=lambda p: _natural_sort_key(p.stem) + ): if f.is_file(): entries.append((loc_name, lang_name, f.stem, False)) @@ -1156,7 +829,7 @@ def show(filter: str | None, completed: bool, show_all: bool): completed_dir = lang_dir / "completed" if completed_dir.is_dir(): for f in sorted( - completed_dir.iterdir(), key=lambda p: natural_sort_key(p.stem) + completed_dir.iterdir(), key=lambda p: _natural_sort_key(p.stem) ): if f.is_file(): entries.append((loc_name, lang_name, f.stem, True)) @@ -1275,7 +948,7 @@ def find(keyword: str, completed: bool | None): if completed is not None: entries = [e for e in entries if e[3] == completed] - entries.sort(key=lambda e: (e[0], e[1], natural_sort_key(e[2]))) + entries.sort(key=lambda e: (e[0], e[1], _natural_sort_key(e[2]))) if not entries: click.echo("No problems found.") @@ -1316,70 +989,9 @@ def find(keyword: str, completed: bool | None): @click.argument("target", type=str, nargs=1, required=True) @click.option("--force", "-f", is_flag=True, help="Overwrite existing HTML files") def fetchprob(target: str, force: bool): - """ - Fetch BOJ problem HTML into storage/zeta/_static. - - TARGET: - zeta/ Fetch one problem - zeta Fetch all detected problem ids under storage/zeta - """ - location, problem_id = parse_fetchprob_target(target) - if location != "zeta": - raise click.UsageError("only 'zeta' location is supported") - - if problem_id is not None: - if _problem_static_html_path(problem_id).exists() and not force: - click.echo(f"{problem_id}: skipped (already exists)") - return - - raw_html = fetch_boj_problem_html(problem_id) - offline_html = make_offline_problem_html(problem_id, raw_html, force=force) - result = save_problem_html(problem_id, offline_html, force=force) - if result == "skipped": - click.echo(f"{problem_id}: skipped (already exists)") - else: - click.echo(f"{problem_id}: fetched (offline processed + images)") - return - - ids = collect_zeta_problem_ids() - if not ids: - click.echo("No problem ids found in storage/zeta") - return - - attempted = len(ids) - fetched = 0 - skipped = 0 - failed = 0 - - for pid in ids: - try: - if _problem_static_html_path(pid).exists() and not force: - skipped += 1 - click.echo(f"{pid}: skipped") - continue - - raw_html = fetch_boj_problem_html(pid) - offline_html = make_offline_problem_html(pid, raw_html, force=force) - result = save_problem_html(pid, offline_html, force=force) - if result == "skipped": - skipped += 1 - click.echo(f"{pid}: skipped") - else: - fetched += 1 - click.echo(f"{pid}: fetched (offline processed + images)") - except click.ClickException as e: - failed += 1 - click.echo(f"{pid}: failed ({e.message})") - - click.echo() - click.secho( - ( - f"Summary - attempted: {attempted}, fetched: {fetched}, " - f"skipped: {skipped}, failed: {failed}" - ), - fg="cyan", - bold=True, - ) + """(disabled) Fetch BOJ problem HTML - BOJ is closing, keeping for future multi-location support.""" + del target, force + raise click.ClickException("fetchprob is currently disabled (BOJ closing)") cli.add_command(run)