Merge branch 'main' into add-numpy-support

abetlen · abetlen · commit 66bcb8d70d7f · 2023-05-26T20:25:03.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,4 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added first version of the changelog
+- Added first version of the changelog
+
+### Fixed
+
+- Performance bug in stop sequence check slowing down streaming.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,49 @@
+update:
+	poetry install
+	git submodule update --init --recursive
+
+update.vendor:
+	cd vendor/llama.cpp && git pull origin master
+
+build:
+	python3 setup.py develop
+
+build.cuda:
+	CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
+
+build.opencl:
+	CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 python3 setup.py develop
+
+build.openblas:
+	CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
+
+build.blis:
+	CMAKE_ARGS="-DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" FORCE_CMAKE=1 python3 setup.py develop
+
+build.sdist:
+	python3 setup.py sdist
+
+deploy.pypi:
+	python3 -m twine upload dist/*
+
+deploy.gh-docs:
+	mkdocs build
+	mkdocs gh-deploy
+
+clean:
+	- cd vendor/llama.cpp && make clean
+	- cd vendor/llama.cpp && rm libllama.so
+	- rm -rf _skbuild
+	- rm llama_cpp/libllama.so
+
+.PHONY: \
+	update \
+	update.vendor \
+	build \
+	build.cuda \
+	build.opencl \
+	build.openblas \
+	build.sdist \
+	deploy.pypi \
+	deploy.gh-docs \
+	clean
diff --git a/README.md b/README.md
@@ -155,6 +155,17 @@ To get started, clone the repository and install the package in development mode
 
 ```bash
 git clone --recurse-submodules git@github.com:abetlen/llama-cpp-python.git
+
+# Install with pip
+pip install -e .
+
+# if you want to use the fastapi / openapi server
+pip install -e .[server]
+
+# If you're a poetry user, installing will also include a virtual environment
+poetry install --all-extras
+. .venv/bin/activate
+
 # Will need to be re-run any time vendor/llama.cpp is updated
 python3 setup.py develop
 ```
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -795,20 +795,22 @@ def _create_completion(
                 break
 
             if stream:
+                remaining_tokens = completion_tokens[returned_tokens:]
+                remaining_text = self.detokenize(remaining_tokens)
+                remaining_length = len(remaining_text)
+
                 # We want to avoid yielding any characters from
                 # the generated text if they are part of a stop
                 # sequence.
                 first_stop_position = 0
                 for s in stop_sequences:
-                    for i in range(len(s), 0, -1):
-                        if all_text.endswith(s[:i]):
+                    for i in range(min(len(s), remaining_length), 0, -1):
+                        if remaining_text.endswith(s[:i]):
                             if i > first_stop_position:
                                 first_stop_position = i
                             break
 
                 token_end_position = 0
-                remaining_tokens = completion_tokens[returned_tokens:]
-                remaining_length = len(self.detokenize(remaining_tokens))
                 for token in remaining_tokens:
                     token_end_position += len(self.detokenize([token]))
                     # Check if stop sequence is in the token
diff --git a/poetry.lock b/poetry.lock
diff --git a/poetry.toml b/poetry.toml
@@ -0,0 +1,3 @@
+[virtualenvs]
+in-project = true
+prefer-active-python = true
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,9 @@ include = [
 [tool.poetry.dependencies]
 python = "^3.8.1"
 typing-extensions = "^4.5.0"
-
+uvicorn = { version = "^0.21.1", optional = true }
+fastapi = { version = "^0.95.0", optional = true }
+sse-starlette = { version = "^1.3.3", optional = true }
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.3.0"
@@ -25,6 +27,10 @@ mkdocstrings = {extras = ["python"], version = "^0.21.2"}
 mkdocs-material = "^9.1.14"
 pytest = "^7.3.1"
 httpx = "^0.24.1"
+scikit-build = "0.13"
+
+[tool.poetry.extras]
+server = ["uvicorn", "fastapi", "sse-starlette"]
 
 [build-system]
 requires = [

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[virtualenvs]`
	`2`	`+in-project = true`
	`3`	`+prefer-active-python = true`