|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +# Copyright 2009-present MongoDB, Inc. |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | + |
| 17 | +""" |
| 18 | +Patches HTML files within the latest API doc directory (under APIDOCSPATH) to |
| 19 | +redirect users from `/api/current` to canonical URLs under `/api/mongocxx-X.Y.Z`. |
| 20 | +""" |
| 21 | + |
| 22 | +from concurrent.futures import ProcessPoolExecutor |
| 23 | +from packaging.version import Version, InvalidVersion |
| 24 | +from pathlib import Path |
| 25 | +from typing import List, Tuple |
| 26 | + |
| 27 | +import re |
| 28 | +import os |
| 29 | + |
| 30 | + |
| 31 | +def find_api_docs_path() -> str: |
| 32 | + """ |
| 33 | + Return an absolute path to the directory containing the API docs. |
| 34 | + """ |
| 35 | + api_docs_path: str | None = os.environ.get('APIDOCSPATH') |
| 36 | + if not api_docs_path: |
| 37 | + raise RuntimeError('APIDOCSPATH environment variable is not set!') |
| 38 | + |
| 39 | + if not os.path.exists(api_docs_path): |
| 40 | + raise RuntimeError('path to API docs does not exist!') |
| 41 | + |
| 42 | + return os.path.abspath(api_docs_path) |
| 43 | + |
| 44 | + |
| 45 | +def find_api_docs(api_docs_path: str) -> List[str]: |
| 46 | + """ |
| 47 | + Return a list of API doc directories by name. |
| 48 | + """ |
| 49 | + api_docs: List[str] = [] |
| 50 | + for dir in os.scandir(api_docs_path): |
| 51 | + if dir.is_dir() and not dir.is_symlink(): |
| 52 | + api_docs.append(dir.name) |
| 53 | + |
| 54 | + # Sort by legacy vs. modern, then by SemVer. Example: |
| 55 | + # - legacy-0.1.0 |
| 56 | + # - legacy-0.2.0 |
| 57 | + # - legacy-0.10.0 |
| 58 | + # - mongocxx-3.1.0 |
| 59 | + # - mongocxx-3.2.0 |
| 60 | + # - mongocxx-3.10.0 |
| 61 | + # Skip directories with a version suffix, e.g. `mongocxx-1.2.3-rc0`. |
| 62 | + def by_version(p: str) -> Tuple[bool, Version] | None: |
| 63 | + is_legacy: bool = p.startswith('legacy-') |
| 64 | + try: |
| 65 | + version = p.removeprefix('legacy-') if is_legacy else p.removeprefix('mongocxx-') |
| 66 | + if version.find('-') != -1: |
| 67 | + print(f' - Skipping: {p}') |
| 68 | + return None |
| 69 | + return (not is_legacy, Version(version)) |
| 70 | + except InvalidVersion: |
| 71 | + raise RuntimeError(f'unexpected API doc name "{p}": APIDOCSPATH may not be correct!') from None |
| 72 | + |
| 73 | + api_docs = [doc for doc in api_docs if by_version(doc) is not None] |
| 74 | + api_docs.sort(key=by_version) |
| 75 | + |
| 76 | + return api_docs |
| 77 | + |
| 78 | + |
| 79 | +def patch_redirect_current_pages(apidocspath, latest): |
| 80 | + """ |
| 81 | + Patch all HTML files under the latest API doc directory. |
| 82 | + """ |
| 83 | + |
| 84 | + pages: List[Path] = [] |
| 85 | + |
| 86 | + for (dirpath, _, filenames) in os.walk(os.path.join(apidocspath, latest)): |
| 87 | + for filename in filenames: |
| 88 | + page = Path(os.path.join(dirpath, filename)) |
| 89 | + if page.suffix == '.html': |
| 90 | + pages.append(page) |
| 91 | + |
| 92 | + futures = [] |
| 93 | + |
| 94 | + with ProcessPoolExecutor() as executor: |
| 95 | + for page in pages: |
| 96 | + futures.append(executor.submit(insert_current_redirect, apidocspath, page, latest)) |
| 97 | + |
| 98 | + for future in futures: |
| 99 | + future.result() |
| 100 | + |
| 101 | + |
| 102 | +def insert_current_redirect(apidocspath, page, latest): |
| 103 | + """ |
| 104 | + Insert a <link> and <script> at the end of the <head> section. |
| 105 | + Skip modifying the document if the patch tag is found. |
| 106 | + """ |
| 107 | + |
| 108 | + path = str(Path(page).relative_to(os.path.join(apidocspath, latest))) |
| 109 | + |
| 110 | + patch_tag = f'patch-apidocs-current-redirects: {latest}' |
| 111 | + |
| 112 | + is_patched = re.compile(patch_tag) |
| 113 | + end_of_head_re = re.compile(r'^(\s*)</head>$') |
| 114 | + |
| 115 | + with open(page, "r+") as file: |
| 116 | + lines = [line for line in file] |
| 117 | + |
| 118 | + idx = None |
| 119 | + indent = '' |
| 120 | + |
| 121 | + for idx, line in enumerate(lines): |
| 122 | + if is_patched.search(line): |
| 123 | + # This file has already been patched. |
| 124 | + return |
| 125 | + |
| 126 | + m = end_of_head_re.match(line) |
| 127 | + if m: |
| 128 | + # Patched index.html has 1-space indentation. The rest have none. |
| 129 | + indent = '' if m.group(1) == '' else ' ' |
| 130 | + end_of_head = idx |
| 131 | + break |
| 132 | + |
| 133 | + if idx is None: |
| 134 | + raise RuntimeError(f'could not find end of `<head>` in {path}') |
| 135 | + |
| 136 | + # Insert patch tag to avoid repeated patch of the same file. |
| 137 | + lines.insert(end_of_head, indent + f'<!-- {patch_tag} -->\n') |
| 138 | + end_of_head += 1 |
| 139 | + |
| 140 | + # Canonical URL. Inform search engines about the redirect. |
| 141 | + lines.insert( |
| 142 | + end_of_head, |
| 143 | + indent + f'<link rel="canonical" href="https://mongocxx.org/api/{latest}/{path}"/>\n') |
| 144 | + end_of_head += 1 |
| 145 | + |
| 146 | + # Redirect script. Avoid generating history for the `/current` page during the redirect. |
| 147 | + script = '' |
| 148 | + script += indent + '<script type="text/javascript">\n' |
| 149 | + script += indent + 'if (window.location.pathname.startsWith("/api/current/")) {\n' |
| 150 | + script += indent + ' window.location.replace(\n' |
| 151 | + script += indent + f' window.location.href.replace("/api/current/", "/api/{latest}/")\n' |
| 152 | + script += indent + ' )\n' |
| 153 | + script += indent + '}\n' |
| 154 | + script += indent + '</script>\n' |
| 155 | + lines.insert(end_of_head, script) |
| 156 | + end_of_head += 1 |
| 157 | + |
| 158 | + file.seek(0) |
| 159 | + for line in lines: |
| 160 | + file.write(line) |
| 161 | + file.truncate() |
| 162 | + |
| 163 | + |
| 164 | +def main(): |
| 165 | + api_docs_path: str = find_api_docs_path() |
| 166 | + |
| 167 | + print(f'Patching API docs in: {api_docs_path}') |
| 168 | + |
| 169 | + print('Finding API docs...') |
| 170 | + api_docs = find_api_docs(api_docs_path) |
| 171 | + if len(api_docs) == 0: |
| 172 | + raise RuntimeError(f'no API docs found: APIDOCSPATH may not be correct!') |
| 173 | + print('Finding API docs... done.') |
| 174 | + |
| 175 | + print(f' - Found {len(api_docs)} API docs: {api_docs[0]} ... {api_docs[-1]}') |
| 176 | + |
| 177 | + latest_doc = api_docs[-1] |
| 178 | + print(f' - Using {latest_doc} as the latest API doc.') |
| 179 | + |
| 180 | + print(f'Patching latest API doc pages to redirect from /current to /{latest_doc}...') |
| 181 | + patch_redirect_current_pages(api_docs_path, latest_doc) |
| 182 | + print(f'Patching latest API doc pages to redirect from /current to /{latest_doc}... done.') |
| 183 | + |
| 184 | + |
| 185 | +if __name__ == '__main__': |
| 186 | + main() |
0 commit comments