Skip to content

Commit e1bd8b9

Browse files
committed
updated code to use more parameters.
1 parent ebaa0f3 commit e1bd8b9

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

app/backend/core/similarity_helper.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import pandas as pd
33

4-
from api.openai_api_helper import get_embedding
4+
from core.openai_api_helper import get_embedding
55

66
def cosine_similarity(a, b):
77
if len(a) > len(b):
@@ -10,22 +10,20 @@ def cosine_similarity(a, b):
1010
a = np.pad(a, (0, len(b) - len(a)), 'constant')
1111
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
1212

13-
def get_similarity_dataframe(query: str, dataset: pd.core.frame.DataFrame, rows: int) -> pd.core.frame.DataFrame:
14-
SIMILARITIES_RESULTS_THRESHOLD = 0.70
15-
13+
def get_similarity_df(query, dataset, rows, similarities_results_threshold, embedding_model) -> pd.core.frame.DataFrame:
1614
# create a copy of the dataset
1715
dataset_vectors = dataset.copy()
1816

1917
# get the embeddings for the query
20-
query_embeddings = get_embedding(query)
18+
query_embeddings = get_embedding(query, embedding_model)
2119

2220
# create a new column with the calculated similarity for each row
2321
dataset_vectors["similarity"] = dataset_vectors["Embedding"].apply(
2422
lambda x: cosine_similarity(np.array(query_embeddings), np.array(x))
2523
)
2624

2725
# filter the videos by similarity
28-
mask = dataset_vectors["similarity"] >= SIMILARITIES_RESULTS_THRESHOLD
26+
mask = dataset_vectors["similarity"] >= similarities_results_threshold
2927
dataset_vectors = dataset_vectors[mask].copy()
3028

3129
# sort the videos by similarity

0 commit comments

Comments
 (0)