Skip to content

Commit 4c22810

Browse files
committed
compilable exe
1 parent 7f4189a commit 4c22810

File tree

5 files changed

+140
-7
lines changed

5 files changed

+140
-7
lines changed

docs/Windows.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515

1616
2. Setup python dependencies
1717
```
18-
cd unstructured-api
19-
python -m pip install -r requirements/base.txt
20-
python -c "import nltk; nltk.download('punkt')"
21-
python -c "import nltk; nltk.download('averaged_perceptron_tagger')"
18+
cd <path/to/repo/>
19+
python -m pip install -r requirements/win-base.txt // uvloop does not support Windows
20+
python -c "import nltk; nltk.download('punkt', download_dir='nltk_data')"
21+
python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='nltk_data')"
22+
pip install httpx htmlBuilder pydantic_settings
2223
```
2324
2425
3. Launch the app
@@ -40,7 +41,7 @@
4041
python -m pip install -r requirements/win-base.txt // uvloop does not support Windows
4142
python -c "import nltk; nltk.download('punkt', download_dir='nltk_data')"
4243
python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='nltk_data')"
43-
pip install httpx htmlBuilder
44+
pip install httpx htmlBuilder pydantic_settings
4445
```
4546
4. Download `sqlite3.dll` as a patch from [link](https://www.sqlite.org/download.html). Download [sqlite-dll-win-x64-3460000.zip](https://www.sqlite.org/2024/sqlite-dll-win-x64-3460000.zip). Somehow python native `sqlite3` does not work. Place it at the root of repository. Unzip the zip file.
4647
5. Convert the unstructuredioapi repo into a python package by updating the `pyproject.toml`

prepline_general/api/app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import logging
66
import os
77

8-
from .general import router as general_router
9-
from .openapi import set_custom_openapi
8+
from prepline_general.api.general import router as general_router
9+
from prepline_general.api.openapi import set_custom_openapi
1010

1111
logger = logging.getLogger("unstructured_api")
1212

pyproject.toml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
2+
[build-system]
3+
# setuptools-scm considers all files tracked by git to be data files
4+
requires = ["setuptools>=62.0", "setuptools-scm"]
5+
build-backend = "setuptools.build_meta"
6+
7+
[project]
8+
name = "prepline_general"
9+
description = "UnstructuredIO API"
10+
readme = "README.md"
11+
requires-python = "~=3.10"
12+
# keywords = ["one", "two"]
13+
license = { text = "Proprietary" }
14+
classifiers = [ # https://pypi.org/classifiers/
15+
"Development Status :: 3 - Alpha",
16+
"Programming Language :: Python :: 3 :: Only",
17+
"Intended Audience :: Information Technology",
18+
"Operating System :: Unix",
19+
]
20+
version="0.0.68"
21+
122
[tool.black]
223
line-length = 100
324

@@ -35,3 +56,6 @@ known-first-party = [
3556
"unstructured",
3657
"unstructured_inference",
3758
]
59+
60+
[tool.setuptools.packages.find]
61+
where = ["."]

unstructuredio_api.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
from pydantic_settings import BaseSettings, SettingsConfigDict
3+
4+
5+
class Config(BaseSettings):
6+
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
7+
unstapi_port: int = 6989
8+
unstapi_host: str = "0.0.0.0"
9+
unstapi_workers: int = 1
10+
log_config: str = os.path.join("_internal", "config", "logger_config.yaml")
11+
12+
13+
config = Config()
14+
15+
if __name__ == "__main__":
16+
import uvicorn
17+
18+
# if os.name == "nt":
19+
from multiprocessing import freeze_support
20+
21+
freeze_support()
22+
print("The system is Windows.")
23+
# else:
24+
# print("The system is not Windows.")
25+
26+
uvicorn.run(
27+
"prepline_general.api.app:app",
28+
reload=False,
29+
port=config.unstapi_port,
30+
host=config.unstapi_host,
31+
workers=config.unstapi_workers,
32+
log_config=config.log_config,
33+
)

unstructuredio_api.spec

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# -*- mode: python ; coding: utf-8 -*-
2+
3+
import os
4+
from pathlib import Path
5+
import sys
6+
from PyInstaller.utils.hooks import collect_all
7+
8+
binaries_list = [
9+
('C:\\Program Files\\LibreOffice\\program', 'libreoffice'), # modify this to point to where the LibreOffice is installed
10+
(Path('sqlite-dll-win-x64-3460000/sqlite3.dll').as_posix(), '.'), # modify this to point to where you unzip the sqlite3.dll
11+
(Path('sqlite-dll-win-x64-3460000/sqlite3.def').as_posix(), '.'), # modify this to point to where you unzip the sqlite3.def
12+
13+
]
14+
15+
datas_list = [
16+
(Path('logger_config.yaml').as_posix(), 'config'), # modify this to point to where the repo is
17+
(Path('nltk_data').as_posix(), 'nltk_data') # modify this to point to where nltk download the nltk data
18+
]
19+
20+
hiddenimports_list = []
21+
22+
def add_package(package_name):
23+
datas, binaries, hiddenimports = collect_all(package_name)
24+
datas_list.extend(datas)
25+
binaries_list.extend(binaries)
26+
hiddenimports_list.extend(hiddenimports)
27+
28+
# Collect all resources from the package_name
29+
add_package('unstructured')
30+
add_package('effdet')
31+
add_package('onnxruntime')
32+
add_package('encodings')
33+
add_package('prepline_general')
34+
35+
a = Analysis(
36+
[Path('unstructuredio_api.py').as_posix()],
37+
pathex=[],
38+
binaries=binaries_list,
39+
datas=datas_list,
40+
hiddenimports=hiddenimports_list,
41+
hookspath=[],
42+
hooksconfig={},
43+
runtime_hooks=[],
44+
excludes=[],
45+
noarchive=False,
46+
optimize=0,
47+
)
48+
pyz = PYZ(a.pure)
49+
50+
exe = EXE(
51+
pyz,
52+
a.scripts,
53+
[],
54+
exclude_binaries=True,
55+
name='unstructuredio_api',
56+
debug=False,
57+
bootloader_ignore_signals=False,
58+
strip=False,
59+
upx=True,
60+
console=True,
61+
disable_windowed_traceback=False,
62+
argv_emulation=False,
63+
target_arch=None,
64+
codesign_identity=None,
65+
entitlements_file=None,
66+
)
67+
coll = COLLECT(
68+
exe,
69+
a.binaries,
70+
a.datas,
71+
strip=False,
72+
upx=True,
73+
upx_exclude=[],
74+
name='unstructuredio_api',
75+
)

0 commit comments

Comments
 (0)