Skip to content

Commit 4bd17e0

Browse files
committed
added find_documents method
1 parent 75ed4ff commit 4bd17e0

File tree

1 file changed

+47
-16
lines changed

1 file changed

+47
-16
lines changed

Search_Engine/backend.py

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,21 @@ def __init__(self):
2222
connection object
2323
"""
2424
self.conn = sqlite3.connect("searchengine.db")
25-
self.cur = self.conn.cursor()
26-
res = self.cur.execute("SELECT name FROM sqlite_master WHERE name='IdToDoc'")
25+
cur = self.conn.cursor()
26+
res = cur.execute("SELECT name FROM sqlite_master WHERE name='IdToDoc'")
2727
tables_exist = res.fetchone()
2828

2929
if not tables_exist:
30-
self.cur.execute("CREATE TABLE IdToDoc(id INTEGER PRIMARY KEY, document TEXT)")
31-
self.cur.execute('CREATE TABLE WordToId (name TEXT, value TEXT)')
32-
self.cur.execute("INSERT INTO WordToId VALUES (?, ?)", ("index", "{}",))
30+
self.conn.execute("CREATE TABLE IdToDoc(id INTEGER PRIMARY KEY, document TEXT)")
31+
self.conn.execute('CREATE TABLE WordToId (name TEXT, value TEXT)')
32+
cur.execute("INSERT INTO WordToId VALUES (?, ?)", ("index", "{}",))
3333
# self.conn.commit()
3434

3535
# cur.execute("INSERT INTO DocumentStore (document) VALUES (?)", (document1,))
3636
# self.conn.commit()
37-
res = self.cur.execute("SELECT name FROM sqlite_master")
38-
print(res.fetchall())
37+
cur = self.conn.cursor()
38+
res = cur.execute("SELECT name FROM sqlite_master")
39+
# print(res.fetchall())
3940
# self.index = test_data['documents'][:-1]
4041
#
4142

@@ -55,7 +56,8 @@ def index_document(self, document):
5556
the document to the index WordToId
5657
"""
5758
row_id = self._add_to_IdToDoc(document)
58-
reverse_idx = self.cur.execute("SELECT value FROM WordToId WHERE name='index'").fetchone()[0]
59+
cur = self.conn.cursor()
60+
reverse_idx = cur.execute("SELECT value FROM WordToId WHERE name='index'").fetchone()[0]
5961
reverse_idx = json.loads(reverse_idx)
6062
document = document.split()
6163
for word in document:
@@ -65,8 +67,9 @@ def index_document(self, document):
6567
if row_id not in reverse_idx[word]: # incase the word has already been indexed
6668
reverse_idx[word].append(row_id)
6769
reverse_idx = json.dumps(reverse_idx)
68-
self.cur.execute("UPDATE WordToId SET value = (?) WHERE name='index'", (reverse_idx,))
69-
print(reverse_idx)
70+
cur = self.conn.cursor()
71+
cur.execute("UPDATE WordToId SET value = (?) WHERE name='index'", (reverse_idx,))
72+
# print(reverse_idx)
7073

7174
def _add_to_IdToDoc(self, document):
7275
"""
@@ -77,19 +80,47 @@ def _add_to_IdToDoc(self, document):
7780
into the db
7881
- retrieve and return the row id of the inserted document
7982
"""
80-
res = self.cur.execute("INSERT INTO IdToDoc (document) VALUES (?)", (document,))
83+
cur = self.conn.cursor()
84+
res = cur.execute("INSERT INTO IdToDoc (document) VALUES (?)", (document,))
8185
return res.lastrowid
8286

87+
def find_documents(self, search_term):
88+
cur = self.conn.cursor()
89+
reverse_idx = cur.execute("SELECT value FROM WordToId WHERE name='index'").fetchone()[0]
90+
reverse_idx = json.loads(reverse_idx)
91+
search_term = search_term.split(" ")
92+
all_docs_with_search_term = []
93+
for term in search_term:
94+
if term in reverse_idx:
95+
all_docs_with_search_term.append(reverse_idx[term])
8396

97+
if not all_docs_with_search_term: # the search term does not exist
98+
return []
8499

85-
def find_documents(self, search_term):
86-
pass
100+
common_idx_of_docs = set(all_docs_with_search_term[0])
101+
for idx in all_docs_with_search_term[1:]:
102+
common_idx_of_docs.intersection_update(idx)
103+
104+
if not common_idx_of_docs: # the search term does not exist
105+
return []
106+
107+
# print(common_idx_of_docs)
108+
self._documents_with_idx(common_idx_of_docs)
109+
110+
def _documents_with_idx(self, idxs):
111+
cur = self.conn.cursor()
112+
# reverse_idx = cur.execute("SELECT value FROM IdToDoc WHERE id=(?) or id=(?)", ()).fetchone()[0]
113+
sql="SELECT value FROM IdToDoc WHERE id in ({seq})".format(
114+
seq=','.join(['?']*len(idxs))
115+
)
116+
result = cur.execute(sql, idxs)
117+
print(result)
87118

88-
def _search_index(self):
89-
pass
90119

91120
if __name__ == "__main__":
92121
se = SearchEngine()
93122
se.index_document("we should all strive to be happy and happy again")
94123
se.index_document("happiness is all you need")
95-
se.index_document("no way should we be sad")
124+
se.index_document("no way should we be sad")
125+
se.index_document("a cheerful heart is a happy one")
126+
se.find_documents("happy")

0 commit comments

Comments
 (0)