@@ -30,19 +30,16 @@ def __init__(self):
30
30
self .conn .execute ("CREATE TABLE IdToDoc(id INTEGER PRIMARY KEY, document TEXT)" )
31
31
self .conn .execute ('CREATE TABLE WordToId (name TEXT, value TEXT)' )
32
32
cur .execute ("INSERT INTO WordToId VALUES (?, ?)" , ("index" , "{}" ,))
33
- # self.conn.commit()
34
33
35
- # cur.execute("INSERT INTO DocumentStore (document) VALUES (?)", (document1,))
36
- # self.conn.commit()
37
34
cur = self .conn .cursor ()
38
- res = cur .execute ("SELECT name FROM sqlite_master" )
35
+ # res = cur.execute("SELECT name FROM sqlite_master")
39
36
# print(res.fetchall())
40
37
# self.index = test_data['documents'][:-1]
41
38
#
42
39
43
40
def index_document (self , document ):
44
41
"""
45
- Returns -
42
+ Returns - <sqlite3.Cursor object>
46
43
Input - str: a string of words called document
47
44
----------
48
45
Indexes the document. It does this by performing two
@@ -53,7 +50,8 @@ def index_document(self, document):
53
50
to IdToDoc
54
51
- retrieves the id of the inserted document
55
52
- uses the id to call the method that adds the words of
56
- the document to the index WordToId
53
+ the document to the reverse index WordToId if the word has not
54
+ already been indexed
57
55
"""
58
56
row_id = self ._add_to_IdToDoc (document )
59
57
cur = self .conn .cursor ()
@@ -64,12 +62,12 @@ def index_document(self, document):
64
62
if word not in reverse_idx :
65
63
reverse_idx [word ] = [row_id ]
66
64
else :
67
- if row_id not in reverse_idx [word ]: # incase the word has already been indexed
65
+ if row_id not in reverse_idx [word ]:
68
66
reverse_idx [word ].append (row_id )
69
67
reverse_idx = json .dumps (reverse_idx )
70
68
cur = self .conn .cursor ()
71
- cur .execute ("UPDATE WordToId SET value = (?) WHERE name='index'" , (reverse_idx ,))
72
- # print(reverse_idx )
69
+ result = cur .execute ("UPDATE WordToId SET value = (?) WHERE name='index'" , (reverse_idx ,))
70
+ return ( result )
73
71
74
72
def _add_to_IdToDoc (self , document ):
75
73
"""
@@ -85,6 +83,16 @@ def _add_to_IdToDoc(self, document):
85
83
return res .lastrowid
86
84
87
85
def find_documents (self , search_term ):
86
+ """
87
+ Returns - <class method>: the return value of the _find_documents_with_idx method
88
+ Input - str: a string of words called `search_term`
89
+ ---------
90
+ - retrieve the reverse index
91
+ - use the words contained in the search term to find all the idxs
92
+ that contain the word
93
+ - use idxs to call the _find_documents_with_idx method
94
+ - return the result of the called method
95
+ """
88
96
cur = self .conn .cursor ()
89
97
reverse_idx = cur .execute ("SELECT value FROM WordToId WHERE name='index'" ).fetchone ()[0 ]
90
98
reverse_idx = json .loads (reverse_idx )
@@ -104,9 +112,9 @@ def find_documents(self, search_term):
104
112
if not common_idx_of_docs : # the search term does not exist
105
113
return []
106
114
107
- return self ._documents_with_idx (common_idx_of_docs )
115
+ return self ._find_documents_with_idx (common_idx_of_docs )
108
116
109
- def _documents_with_idx (self , idxs ):
117
+ def _find_documents_with_idx (self , idxs ):
110
118
idxs = list (idxs )
111
119
cur = self .conn .cursor ()
112
120
sql = "SELECT document FROM IdToDoc WHERE id in ({seq})" .format (
@@ -119,7 +127,7 @@ def _documents_with_idx(self, idxs):
119
127
if __name__ == "__main__" :
120
128
se = SearchEngine ()
121
129
se .index_document ("we should all strive to be happy and happy again" )
122
- se .index_document ("happiness is all you need" )
130
+ print ( se .index_document ("happiness is all you need" ) )
123
131
se .index_document ("no way should we be sad" )
124
132
se .index_document ("a cheerful heart is a happy one" )
125
- se .find_documents ("happy" )
133
+ print ( se .find_documents ("happy" ) )
0 commit comments