@@ -22,20 +22,21 @@ def __init__(self):
22
22
connection object
23
23
"""
24
24
self .conn = sqlite3 .connect ("searchengine.db" )
25
- self . cur = self .conn .cursor ()
26
- res = self . cur .execute ("SELECT name FROM sqlite_master WHERE name='IdToDoc'" )
25
+ cur = self .conn .cursor ()
26
+ res = cur .execute ("SELECT name FROM sqlite_master WHERE name='IdToDoc'" )
27
27
tables_exist = res .fetchone ()
28
28
29
29
if not tables_exist :
30
- self .cur .execute ("CREATE TABLE IdToDoc(id INTEGER PRIMARY KEY, document TEXT)" )
31
- self .cur .execute ('CREATE TABLE WordToId (name TEXT, value TEXT)' )
32
- self . cur .execute ("INSERT INTO WordToId VALUES (?, ?)" , ("index" , "{}" ,))
30
+ self .conn .execute ("CREATE TABLE IdToDoc(id INTEGER PRIMARY KEY, document TEXT)" )
31
+ self .conn .execute ('CREATE TABLE WordToId (name TEXT, value TEXT)' )
32
+ cur .execute ("INSERT INTO WordToId VALUES (?, ?)" , ("index" , "{}" ,))
33
33
# self.conn.commit()
34
34
35
35
# cur.execute("INSERT INTO DocumentStore (document) VALUES (?)", (document1,))
36
36
# self.conn.commit()
37
- res = self .cur .execute ("SELECT name FROM sqlite_master" )
38
- print (res .fetchall ())
37
+ cur = self .conn .cursor ()
38
+ res = cur .execute ("SELECT name FROM sqlite_master" )
39
+ # print(res.fetchall())
39
40
# self.index = test_data['documents'][:-1]
40
41
#
41
42
@@ -55,7 +56,8 @@ def index_document(self, document):
55
56
the document to the index WordToId
56
57
"""
57
58
row_id = self ._add_to_IdToDoc (document )
58
- reverse_idx = self .cur .execute ("SELECT value FROM WordToId WHERE name='index'" ).fetchone ()[0 ]
59
+ cur = self .conn .cursor ()
60
+ reverse_idx = cur .execute ("SELECT value FROM WordToId WHERE name='index'" ).fetchone ()[0 ]
59
61
reverse_idx = json .loads (reverse_idx )
60
62
document = document .split ()
61
63
for word in document :
@@ -65,8 +67,9 @@ def index_document(self, document):
65
67
if row_id not in reverse_idx [word ]: # incase the word has already been indexed
66
68
reverse_idx [word ].append (row_id )
67
69
reverse_idx = json .dumps (reverse_idx )
68
- self .cur .execute ("UPDATE WordToId SET value = (?) WHERE name='index'" , (reverse_idx ,))
69
- print (reverse_idx )
70
+ cur = self .conn .cursor ()
71
+ cur .execute ("UPDATE WordToId SET value = (?) WHERE name='index'" , (reverse_idx ,))
72
+ # print(reverse_idx)
70
73
71
74
def _add_to_IdToDoc (self , document ):
72
75
"""
@@ -77,19 +80,47 @@ def _add_to_IdToDoc(self, document):
77
80
into the db
78
81
- retrieve and return the row id of the inserted document
79
82
"""
80
- res = self .cur .execute ("INSERT INTO IdToDoc (document) VALUES (?)" , (document ,))
83
+ cur = self .conn .cursor ()
84
+ res = cur .execute ("INSERT INTO IdToDoc (document) VALUES (?)" , (document ,))
81
85
return res .lastrowid
82
86
87
+ def find_documents (self , search_term ):
88
+ cur = self .conn .cursor ()
89
+ reverse_idx = cur .execute ("SELECT value FROM WordToId WHERE name='index'" ).fetchone ()[0 ]
90
+ reverse_idx = json .loads (reverse_idx )
91
+ search_term = search_term .split (" " )
92
+ all_docs_with_search_term = []
93
+ for term in search_term :
94
+ if term in reverse_idx :
95
+ all_docs_with_search_term .append (reverse_idx [term ])
83
96
97
+ if not all_docs_with_search_term : # the search term does not exist
98
+ return []
84
99
85
- def find_documents (self , search_term ):
86
- pass
100
+ common_idx_of_docs = set (all_docs_with_search_term [0 ])
101
+ for idx in all_docs_with_search_term [1 :]:
102
+ common_idx_of_docs .intersection_update (idx )
103
+
104
+ if not common_idx_of_docs : # the search term does not exist
105
+ return []
106
+
107
+ # print(common_idx_of_docs)
108
+ self ._documents_with_idx (common_idx_of_docs )
109
+
110
+ def _documents_with_idx (self , idxs ):
111
+ cur = self .conn .cursor ()
112
+ # reverse_idx = cur.execute("SELECT value FROM IdToDoc WHERE id=(?) or id=(?)", ()).fetchone()[0]
113
+ sql = "SELECT value FROM IdToDoc WHERE id in ({seq})" .format (
114
+ seq = ',' .join (['?' ]* len (idxs ))
115
+ )
116
+ result = cur .execute (sql , idxs )
117
+ print (result )
87
118
88
- def _search_index (self ):
89
- pass
90
119
91
120
if __name__ == "__main__" :
92
121
se = SearchEngine ()
93
122
se .index_document ("we should all strive to be happy and happy again" )
94
123
se .index_document ("happiness is all you need" )
95
- se .index_document ("no way should we be sad" )
124
+ se .index_document ("no way should we be sad" )
125
+ se .index_document ("a cheerful heart is a happy one" )
126
+ se .find_documents ("happy" )
0 commit comments