2
2
import test_data
3
3
4
4
class SearchEngine :
5
- def __init__ (self , document1 ):
5
+ """
6
+ It works by building a reverse index store that maps
7
+ words to an id. To find the document(s) that contain
8
+ a certain search term, we then take an intersection
9
+ of the ids
10
+ """
11
+
12
+ def __init__ (self ):
6
13
"""
7
- - Initialize database.
14
+ Return - None
15
+ Input - None
16
+ ----------
17
+ - Initialize database. we use sqlite3
8
18
- Check if the tables exist, if not create them
19
+ - maintain a class level access to the database
20
+ connection object
9
21
"""
10
22
self .conn = sqlite3 .connect ("searchengine.db" )
11
23
cur = self .conn .cursor ()
12
- res = cur .execute ("SELECT name FROM sqlite_master WHERE name='IndexToWord '" )
24
+ res = cur .execute ("SELECT name FROM sqlite_master WHERE name='IdToDoc '" )
13
25
tables_exist = res .fetchone ()
14
26
# tables_exist = res.fetchall()
15
27
if not tables_exist :
16
- self .conn .execute ("CREATE TABLE IndexToWord (id INTEGER PRIMARY KEY, document TEXT)" )
17
- self .conn .execute ('CREATE TABLE WordToIndex (store TEXT)' )
28
+ self .conn .execute ("CREATE TABLE IdToDoc (id INTEGER PRIMARY KEY, document TEXT)" )
29
+ self .conn .execute ('CREATE TABLE WordToId (store TEXT)' )
18
30
# self.conn.commit()
19
31
20
32
# cur.execute("INSERT INTO DocumentStore (document) VALUES (?)", (document1,))
@@ -25,13 +37,42 @@ def __init__(self, document1):
25
37
#
26
38
27
39
def index_document (self , document ):
28
- doc_num = 1
29
- for word in document :
30
- if word not in self .index :
31
- self .index [word ] = set ([doc_num ])
32
- else :
33
- self .index .add (doc_num )
34
- print (self .index )
40
+ """
41
+ Returns -
42
+ Input - str: a string of words called document
43
+ ----------
44
+ Indexes the document. It does this by performing two
45
+ operations - add the document to the IdToDoc, then
46
+ adds the words in the document to WordToId
47
+ - takes in the document (str)
48
+ - passes the document to a method to add the document
49
+ to IdToDoc
50
+ - retrieves the id of the inserted document
51
+ - uses the id to call the method that adds the words of
52
+ the document to the index WordToId
53
+ """
54
+ self ._add_to_IdToDoc (document )
55
+ # self._add_to_WordToId(document)
56
+ # doc_num = 1
57
+ # for word in document:
58
+ # if word not in self.index:
59
+ # self.index[word] = set([doc_num])
60
+ # else:
61
+ # self.index.add(doc_num)
62
+ # print(self.index)
63
+
64
+ def _add_to_IdToDoc (self , document ):
65
+ """
66
+ Returns - int: the id of the inserted document
67
+ Input - str: a string of words called `document`
68
+ ---------
69
+ - use the class-level connection object to insert the document
70
+ into the db
71
+ - retrieve and return the row id of the inserted document
72
+ """
73
+ res = self .conn .execute ("INSERT INTO IdToDoc (document) VALUES (?)" , (document ,))
74
+ return res .lastrowid
75
+
35
76
36
77
37
78
def find_documents (self , search_term ):
@@ -41,4 +82,7 @@ def _search_index(self):
41
82
pass
42
83
43
84
if __name__ == "__main__" :
44
- SearchEngine ("we should all strive to be happy" )
85
+ se = SearchEngine ()
86
+ se .index_document ("we should all strive to be happy" )
87
+ se .index_document ("happiness is all you need" )
88
+ se .index_document ("no way should we be sad" )
0 commit comments