Skip to content

Commit 1d0cbbc

Browse files
committed
fix: add new embedding models
1 parent 41964c5 commit 1d0cbbc

File tree

2 files changed

+37
-35
lines changed

2 files changed

+37
-35
lines changed

requirements-dev.lock

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ altair==5.3.0
2121
# via streamlit
2222
annotated-types==0.7.0
2323
# via pydantic
24-
anthropic==0.26.1
24+
anthropic==0.28.1
2525
# via langchain-anthropic
26-
anyio==4.3.0
26+
anyio==4.4.0
2727
# via anthropic
2828
# via groq
2929
# via httpx
@@ -42,17 +42,17 @@ beautifulsoup4==4.12.3
4242
# via scrapegraphai
4343
blinker==1.8.2
4444
# via streamlit
45-
boto3==1.34.113
45+
boto3==1.34.127
4646
# via langchain-aws
47-
botocore==1.34.113
47+
botocore==1.34.127
4848
# via boto3
4949
# via s3transfer
5050
burr==0.22.1
5151
# via scrapegraphai
5252
cachetools==5.3.3
5353
# via google-auth
5454
# via streamlit
55-
certifi==2024.2.2
55+
certifi==2024.6.2
5656
# via httpcore
5757
# via httpx
5858
# via requests
@@ -67,7 +67,7 @@ contourpy==1.2.1
6767
# via matplotlib
6868
cycler==0.12.1
6969
# via matplotlib
70-
dataclasses-json==0.6.6
70+
dataclasses-json==0.6.7
7171
# via langchain
7272
# via langchain-community
7373
defusedxml==0.7.1
@@ -80,27 +80,26 @@ dnspython==2.6.1
8080
# via email-validator
8181
docutils==0.19
8282
# via sphinx
83-
email-validator==2.1.1
83+
email-validator==2.1.2
8484
# via fastapi
8585
faiss-cpu==1.8.0
8686
# via scrapegraphai
8787
fastapi==0.111.0
8888
# via burr
89-
# via fastapi-pagination
9089
fastapi-cli==0.0.4
9190
# via fastapi
92-
fastapi-pagination==0.12.24
91+
fastapi-pagination==0.12.25
9392
# via burr
94-
filelock==3.14.0
93+
filelock==3.15.1
9594
# via huggingface-hub
96-
fonttools==4.52.1
95+
fonttools==4.53.0
9796
# via matplotlib
9897
free-proxy==1.1.1
9998
# via scrapegraphai
10099
frozenlist==1.4.1
101100
# via aiohttp
102101
# via aiosignal
103-
fsspec==2024.5.0
102+
fsspec==2024.6.0
104103
# via huggingface-hub
105104
furo==2024.5.6
106105
# via scrapegraphai
@@ -116,9 +115,9 @@ google-api-core==2.19.0
116115
# via google-ai-generativelanguage
117116
# via google-api-python-client
118117
# via google-generativeai
119-
google-api-python-client==2.130.0
118+
google-api-python-client==2.133.0
120119
# via google-generativeai
121-
google-auth==2.29.0
120+
google-auth==2.30.0
122121
# via google-ai-generativelanguage
123122
# via google-api-core
124123
# via google-api-python-client
@@ -128,17 +127,17 @@ google-auth-httplib2==0.2.0
128127
# via google-api-python-client
129128
google-generativeai==0.5.4
130129
# via langchain-google-genai
131-
googleapis-common-protos==1.63.0
130+
googleapis-common-protos==1.63.1
132131
# via google-api-core
133132
# via grpcio-status
134133
graphviz==0.20.3
135134
# via burr
136135
# via scrapegraphai
137136
greenlet==3.0.3
138137
# via playwright
139-
groq==0.8.0
138+
groq==0.9.0
140139
# via langchain-groq
141-
grpcio==1.64.0
140+
grpcio==1.64.1
142141
# via google-api-core
143142
# via grpcio-status
144143
grpcio-status==1.62.2
@@ -160,7 +159,7 @@ httpx==0.27.0
160159
# via fastapi
161160
# via groq
162161
# via openai
163-
huggingface-hub==0.23.1
162+
huggingface-hub==0.23.4
164163
# via tokenizers
165164
idna==3.7
166165
# via anyio
@@ -178,15 +177,15 @@ jinja2==3.1.4
178177
# via fastapi
179178
# via pydeck
180179
# via sphinx
181-
jiter==0.4.0
180+
jiter==0.4.2
182181
# via anthropic
183182
jmespath==1.0.1
184183
# via boto3
185184
# via botocore
186185
jsonpatch==1.33
187186
# via langchain
188187
# via langchain-core
189-
jsonpointer==2.4
188+
jsonpointer==3.0.0
190189
# via jsonpatch
191190
jsonschema==4.22.0
192191
# via altair
@@ -219,7 +218,7 @@ langchain-openai==0.1.6
219218
# via scrapegraphai
220219
langchain-text-splitters==0.0.2
221220
# via langchain
222-
langsmith==0.1.63
221+
langsmith==0.1.77
223222
# via langchain
224223
# via langchain-community
225224
# via langchain-core
@@ -231,7 +230,7 @@ markdown-it-py==3.0.0
231230
# via rich
232231
markupsafe==2.1.5
233232
# via jinja2
234-
marshmallow==3.21.2
233+
marshmallow==3.21.3
235234
# via dataclasses-json
236235
matplotlib==3.9.0
237236
# via burr
@@ -257,10 +256,10 @@ numpy==1.26.4
257256
# via pydeck
258257
# via sf-hamilton
259258
# via streamlit
260-
openai==1.30.3
259+
openai==1.34.0
261260
# via burr
262261
# via langchain-openai
263-
orjson==3.10.3
262+
orjson==3.10.5
264263
# via fastapi
265264
# via langsmith
266265
packaging==23.2
@@ -303,7 +302,7 @@ pyasn1==0.6.0
303302
# via rsa
304303
pyasn1-modules==0.4.0
305304
# via google-auth
306-
pydantic==2.7.1
305+
pydantic==2.7.4
307306
# via anthropic
308307
# via burr
309308
# via fastapi
@@ -314,7 +313,7 @@ pydantic==2.7.1
314313
# via langchain-core
315314
# via langsmith
316315
# via openai
317-
pydantic-core==2.18.2
316+
pydantic-core==2.18.4
318317
# via pydantic
319318
pydeck==0.9.1
320319
# via streamlit
@@ -352,7 +351,7 @@ referencing==0.35.1
352351
# via jsonschema-specifications
353352
regex==2024.5.15
354353
# via tiktoken
355-
requests==2.32.2
354+
requests==2.32.3
356355
# via burr
357356
# via free-proxy
358357
# via google-api-core
@@ -375,7 +374,7 @@ s3transfer==0.10.1
375374
# via boto3
376375
semchunk==1.0.1
377376
# via scrapegraphai
378-
sf-hamilton==1.63.0
377+
sf-hamilton==1.66.1
379378
# via burr
380379
shellingham==1.5.4
381380
# via typer
@@ -418,7 +417,7 @@ starlette==0.37.2
418417
# via fastapi
419418
streamlit==1.35.0
420419
# via burr
421-
tenacity==8.3.0
420+
tenacity==8.4.1
422421
# via langchain
423422
# via langchain-community
424423
# via langchain-core
@@ -432,7 +431,7 @@ toml==0.10.2
432431
# via streamlit
433432
toolz==0.12.1
434433
# via altair
435-
tornado==6.4
434+
tornado==6.4.1
436435
# via streamlit
437436
tqdm==4.66.4
438437
# via google-generativeai
@@ -442,7 +441,7 @@ tqdm==4.66.4
442441
# via semchunk
443442
typer==0.12.3
444443
# via fastapi-cli
445-
typing-extensions==4.12.0
444+
typing-extensions==4.12.2
446445
# via anthropic
447446
# via fastapi
448447
# via fastapi-pagination
@@ -469,15 +468,15 @@ undetected-playwright==0.3.0
469468
# via scrapegraphai
470469
uritemplate==4.1.1
471470
# via google-api-python-client
472-
urllib3==1.26.18
471+
urllib3==2.2.2
473472
# via botocore
474473
# via requests
475-
uvicorn==0.29.0
474+
uvicorn==0.30.1
476475
# via burr
477476
# via fastapi
478477
uvloop==0.19.0
479478
# via uvicorn
480-
watchfiles==0.21.0
479+
watchfiles==0.22.0
481480
# via uvicorn
482481
websockets==12.0
483482
# via uvicorn

scrapegraphai/helpers/models_tokens.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@
6060
"stablelm-zephyr": 8192,
6161
"wizardlm2:8x22b": 65536,
6262
# embedding models
63-
"shaw/dmeta-embedding-zh": 8192,
63+
"shaw/dmeta-embedding-zh-small-q4": 8192,
64+
"shaw/dmeta-embedding-zh-q4": 8192,
65+
"chevalblanc/acge_text_embedding": 8192,
66+
"martcreation/dmeta-embedding-zh": 8192,
6467
"snowflake-arctic-embed": 8192,
6568
"mxbai-embed-large": 512
6669
},

0 commit comments

Comments
 (0)