-
-
Notifications
You must be signed in to change notification settings - Fork 46.9k
Wavelet tree #4267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Wavelet tree #4267
Changes from 9 commits
79531e5
f97cc00
8e64f02
81f57e1
f21e0c1
fe050d7
5cb7a26
d6dfb71
58df1d4
49f7d6d
19b0ad8
5de688b
b96aefe
688bcb3
b93ddaf
53a8f22
a829cb7
4403f70
eda9f31
27b7fe1
1cfd947
ce5db3d
05e261d
f20ba34
b02a6fd
e798b75
2317d72
9ea6c1c
ced6008
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,191 @@ | ||||||||
from typing import List, Optional | ||||||||
|
||||||||
""" | ||||||||
Wavelet tree is a data-structure designed to efficiently answer various range queries | ||||||||
for arrays. Wavelets trees are different from other binary trees in the sense that | ||||||||
the nodes are split based on the actual values of the elements and not on indices, | ||||||||
such as the with segment trees or fenwick trees. You can read more about them here: | ||||||||
1. https://users.dcc.uchile.cl/~jperez/papers/ioiconf16.pdf | ||||||||
2. https://www.youtube.com/watch?v=4aSv9PcecDw&t=811s | ||||||||
3. https://www.youtube.com/watch?v=CybAgVF-MMc&t=1178s | ||||||||
""" | ||||||||
|
||||||||
|
||||||||
class Node: | ||||||||
def __init__(self, n): | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please provide return type hint for the function: Please provide descriptive name for the parameter: Please provide type hint for the parameter: |
||||||||
self.minn: int = -1 | ||||||||
self.maxx: int = -1 | ||||||||
self.map_left: List = [-1] * n | ||||||||
self.left: Optional[Node] = None | ||||||||
self.right: Optional[Node] = None | ||||||||
|
||||||||
def __repr__(self): | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file Please provide return type hint for the function: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file Please provide return type hint for the function: |
||||||||
return f"min_value: {self.minn}, max_value: {self.maxx}" | ||||||||
anirudnits marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
|
||||||||
|
||||||||
def build_tree(arr: List[int]) -> Node: | ||||||||
cclauss marked this conversation as resolved.
Show resolved
Hide resolved
cclauss marked this conversation as resolved.
Show resolved
Hide resolved
cclauss marked this conversation as resolved.
Show resolved
Hide resolved
cclauss marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
""" | ||||||||
Builds the tree for the provided list arr and returns the root | ||||||||
of the constructed tree. | ||||||||
""" | ||||||||
|
||||||||
n = len(arr) | ||||||||
|
||||||||
root = Node(n) | ||||||||
|
||||||||
root.minn, root.maxx = min(arr), max(arr) | ||||||||
|
||||||||
# Leaf node case where the node contains only one unique value | ||||||||
if root.minn == root.maxx: | ||||||||
return root | ||||||||
|
||||||||
""" | ||||||||
Take the mean of min and max element as the pivot and | ||||||||
partition arr into left_arr and right_arr with all elements <= pivot in the | ||||||||
left_arr and the rest in right_arr, maintaining the order of the elements, | ||||||||
then recursively build trees for left_arr and right_arr | ||||||||
""" | ||||||||
|
||||||||
pivot = (root.minn + root.maxx) // 2 | ||||||||
left_arr, right_arr = [], [] | ||||||||
|
||||||||
for index, num in enumerate(arr): | ||||||||
if num <= pivot: | ||||||||
left_arr.append(num) | ||||||||
else: | ||||||||
right_arr.append(num) | ||||||||
|
||||||||
cclauss marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
root.map_left[index] = len(left_arr) | ||||||||
|
||||||||
root.left = build_tree(left_arr) | ||||||||
root.right = build_tree(right_arr) | ||||||||
|
||||||||
return root | ||||||||
|
||||||||
|
||||||||
def rank_from_start(node: Node, num: int, i: int) -> int: | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: |
||||||||
""" | ||||||||
Returns the number of occurances of num in interval [0, i] in the list | ||||||||
""" | ||||||||
if i < 0: | ||||||||
return 0 | ||||||||
|
||||||||
# Leaf node cases | ||||||||
if node.minn == node.maxx: | ||||||||
if node.minn == num: | ||||||||
return i + 1 | ||||||||
else: | ||||||||
return 0 | ||||||||
|
||||||||
pivot = (node.minn + node.maxx) // 2 | ||||||||
|
||||||||
if ( | ||||||||
num <= pivot | ||||||||
): # if num <= pivot, go the left subtree and map index i to the left subtree | ||||||||
return rank_from_start(node.left, num, node.map_left[i] - 1) | ||||||||
else: # otherwise go to the right subtree and map index i to the right subtree | ||||||||
return rank_from_start(node.right, num, i - node.map_left[i]) | ||||||||
|
||||||||
|
||||||||
def rank(node: Node, num: int, i: int, j: int) -> int: | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: Please provide descriptive name for the parameter: |
||||||||
""" | ||||||||
Returns the number of occurances of num in interval [i, j] in the list | ||||||||
""" | ||||||||
if i > j: | ||||||||
return 0 | ||||||||
|
||||||||
rank_till_j = rank_from_start(node, num, j) # rank of num in interval [0, j] | ||||||||
rank_before_i = rank_from_start( | ||||||||
node, num, i - 1 | ||||||||
) # rank of num in interval [0, i-1] | ||||||||
|
||||||||
return rank_till_j - rank_before_i | ||||||||
|
||||||||
|
||||||||
def quantile(node: Node, k: int, i: int, j: int) -> int: | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: Please provide descriptive name for the parameter: Please provide descriptive name for the parameter: |
||||||||
""" | ||||||||
Returns the kth smallest element in interval [i, j] in the list, k is 0-indexed | ||||||||
""" | ||||||||
if k > (j - i) or i > j: | ||||||||
return -1 | ||||||||
|
||||||||
# Leaf node case | ||||||||
if node.minn == node.maxx: | ||||||||
return node.minn | ||||||||
|
||||||||
# number of elements in the left subtree in interval [i, j] | ||||||||
num_elements_in_left_tree = node.map_left[j] - (node.map_left[i - 1] if i else 0) | ||||||||
|
||||||||
if num_elements_in_left_tree > k: | ||||||||
return quantile( | ||||||||
node.left, k, (node.map_left[i - 1] if i else 0), node.map_left[j] - 1 | ||||||||
) | ||||||||
else: | ||||||||
return quantile( | ||||||||
node.right, | ||||||||
k - num_elements_in_left_tree, | ||||||||
i - (node.map_left[i - 1] if i else 0), | ||||||||
j - node.map_left[j], | ||||||||
) | ||||||||
|
||||||||
|
||||||||
def range_counting(node: Node, i: int, j: int, x: int, y: int) -> int: | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: Please provide descriptive name for the parameter: Please provide descriptive name for the parameter: Please provide descriptive name for the parameter: |
||||||||
""" | ||||||||
Returns the number of elememts in range [x,y] in interval [i, j] in the list | ||||||||
""" | ||||||||
if i > j or x > y: | ||||||||
return 0 | ||||||||
|
||||||||
if node.minn > y or node.maxx < x: | ||||||||
return 0 | ||||||||
|
||||||||
if x <= node.minn and node.maxx <= y: | ||||||||
return j - i + 1 | ||||||||
|
||||||||
left = range_counting( | ||||||||
node.left, (node.map_left[i - 1] if i else 0), node.map_left[j] - 1, x, y | ||||||||
) | ||||||||
right = range_counting( | ||||||||
node.right, i - (node.map_left[i - 1] if i else 0), j - node.map_left[j], x, y | ||||||||
) | ||||||||
|
||||||||
return left + right | ||||||||
|
||||||||
|
||||||||
def main(): | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please provide return type hint for the function: |
||||||||
""" | ||||||||
>>> arr = [2,1,4,5,6,8,9,1,2,6,7,4,2,6,5,3,2,7] | ||||||||
>>> root = build_tree(arr) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
>>> root | ||||||||
min_value: 1, max_value: 9 | ||||||||
>>> rank(root, 6, 3, 13) | ||||||||
3 | ||||||||
>>> rank(root, 2, 0, 17) | ||||||||
4 | ||||||||
>>> rank(root, 9, 2 ,2) | ||||||||
0 | ||||||||
>>> quantile(root, 2, 2, 5) | ||||||||
6 | ||||||||
>>> quantile(root, 4, 2, 13) | ||||||||
4 | ||||||||
>>> quantile(root, 0, 6, 6) | ||||||||
9 | ||||||||
>>> quantile(root, 4, 2, 5) | ||||||||
-1 | ||||||||
>>> range_counting(root, 1, 10, 3, 7) | ||||||||
5 | ||||||||
>>> range_counting(root, 2, 2, 1, 4) | ||||||||
1 | ||||||||
>>> range_counting(root, 0, 17, 1, 100) | ||||||||
18 | ||||||||
>>> range_counting(root, 1, 0, 1, 100) | ||||||||
0 | ||||||||
>>> range_counting(root, 0, 17, 100, 1) | ||||||||
0 | ||||||||
""" | ||||||||
cclauss marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
|
||||||||
|
||||||||
if __name__ == "__main__": | ||||||||
import doctest | ||||||||
|
||||||||
doctest.testmod() |
Uh oh!
There was an error while loading. Please reload this page.