Skip to content

Commit 446fd9a

Browse files
committed
added the method to add the document to the idtodoc db
1 parent 1a741e7 commit 446fd9a

File tree

1 file changed

+57
-13
lines changed

1 file changed

+57
-13
lines changed

Search_Engine/backend.py

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,31 @@
22
import test_data
33

44
class SearchEngine:
5-
def __init__(self, document1):
5+
"""
6+
It works by building a reverse index store that maps
7+
words to an id. To find the document(s) that contain
8+
a certain search term, we then take an intersection
9+
of the ids
10+
"""
11+
12+
def __init__(self):
613
"""
7-
- Initialize database.
14+
Return - None
15+
Input - None
16+
----------
17+
- Initialize database. we use sqlite3
818
- Check if the tables exist, if not create them
19+
- maintain a class level access to the database
20+
connection object
921
"""
1022
self.conn = sqlite3.connect("searchengine.db")
1123
cur = self.conn.cursor()
12-
res = cur.execute("SELECT name FROM sqlite_master WHERE name='IndexToWord'")
24+
res = cur.execute("SELECT name FROM sqlite_master WHERE name='IdToDoc'")
1325
tables_exist = res.fetchone()
1426
# tables_exist = res.fetchall()
1527
if not tables_exist:
16-
self.conn.execute("CREATE TABLE IndexToWord(id INTEGER PRIMARY KEY, document TEXT)")
17-
self.conn.execute('CREATE TABLE WordToIndex (store TEXT)')
28+
self.conn.execute("CREATE TABLE IdToDoc(id INTEGER PRIMARY KEY, document TEXT)")
29+
self.conn.execute('CREATE TABLE WordToId (store TEXT)')
1830
# self.conn.commit()
1931

2032
# cur.execute("INSERT INTO DocumentStore (document) VALUES (?)", (document1,))
@@ -25,13 +37,42 @@ def __init__(self, document1):
2537
#
2638

2739
def index_document(self, document):
28-
doc_num = 1
29-
for word in document:
30-
if word not in self.index:
31-
self.index[word] = set([doc_num])
32-
else:
33-
self.index.add(doc_num)
34-
print(self.index)
40+
"""
41+
Returns -
42+
Input - str: a string of words called document
43+
----------
44+
Indexes the document. It does this by performing two
45+
operations - add the document to the IdToDoc, then
46+
adds the words in the document to WordToId
47+
- takes in the document (str)
48+
- passes the document to a method to add the document
49+
to IdToDoc
50+
- retrieves the id of the inserted document
51+
- uses the id to call the method that adds the words of
52+
the document to the index WordToId
53+
"""
54+
self._add_to_IdToDoc(document)
55+
# self._add_to_WordToId(document)
56+
# doc_num = 1
57+
# for word in document:
58+
# if word not in self.index:
59+
# self.index[word] = set([doc_num])
60+
# else:
61+
# self.index.add(doc_num)
62+
# print(self.index)
63+
64+
def _add_to_IdToDoc(self, document):
65+
"""
66+
Returns - int: the id of the inserted document
67+
Input - str: a string of words called `document`
68+
---------
69+
- use the class-level connection object to insert the document
70+
into the db
71+
- retrieve and return the row id of the inserted document
72+
"""
73+
res = self.conn.execute("INSERT INTO IdToDoc (document) VALUES (?)", (document,))
74+
return res.lastrowid
75+
3576

3677

3778
def find_documents(self, search_term):
@@ -41,4 +82,7 @@ def _search_index(self):
4182
pass
4283

4384
if __name__ == "__main__":
44-
SearchEngine("we should all strive to be happy")
85+
se = SearchEngine()
86+
se.index_document("we should all strive to be happy")
87+
se.index_document("happiness is all you need")
88+
se.index_document("no way should we be sad")

0 commit comments

Comments
 (0)