Skip to content

Commit bd2b184

Browse files
committed
New data structure: IntMap
IntMap[Key] is conceptually like a Map[Key, Int]. For now, only `apply` and `update` are supported but `remove` is not supported. The map is implemented by means of a perfect hashing scheme that is itself implemented in the parent class `PerfectHashing`. This maps keys to indices in a dense interval. Once we have the index, we can associate keys and values that are simply stored in arrays.
1 parent 785fea9 commit bd2b184

File tree

3 files changed

+264
-0
lines changed

3 files changed

+264
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package dotty.tools.dotc.util
2+
3+
/** A dense map from some `Key` type to `Int. Dense means: All keys and values
4+
* are stored in arrays from 0 up to the size of the map. Keys and values
5+
* can be obtained by index using `key(index)` and `value(index)`. Values
6+
* can also be stored using `setValue(index, value)`.
7+
*
8+
* ome privileged protected access to its internals
9+
* @param initialCapacity Indicates the initial number of slots in the hash table.
10+
* The actual number of slots is always a power of 2, so the
11+
* initial size of the table will be the smallest power of two
12+
* that is equal or greater than the given `initialCapacity`.
13+
* Minimum value is 4.
14+
* @param capacityMultiple The minimum multiple of capacity relative to used elements.
15+
* The hash table will be re-sized once the number of elements
16+
* multiplied by capacityMultiple exceeds the current size of the hash table.
17+
* However, a table of size up to DenseLimit will be re-sized only
18+
* once the number of elements reaches the table's size.
19+
*/
20+
final class IntMap[Key](initialCapacity: Int = 8, capacityMultiple: Int = 2)
21+
extends PerfectHashing[Key](initialCapacity, capacityMultiple):
22+
private var values: Array[Int] = _
23+
24+
def default: Int = -1
25+
26+
protected override def allocate(capacity: Int) =
27+
super.allocate(capacity)
28+
values = new Array[Int](capacity)
29+
30+
/** The value associated with key `k`, or else `default`. */
31+
def apply(k: Key): Int =
32+
val idx = index(k)
33+
if idx < 0 then default else values(idx)
34+
35+
/** Associate key `k` with value `v` */
36+
def update(k: Key, v: Int): Unit =
37+
val idx = add(k) // don't merge the two statements, `add` might change `values`.
38+
values(idx) = v
39+
40+
protected override def growTable() =
41+
val oldValues = values
42+
super.growTable()
43+
Array.copy(oldValues, 0, values, 0, oldValues.length)
44+
45+
def valuesIterator = values.iterator.take(size)
46+
47+
def iterator: Iterator[(Key, Int)] = keysIterator.zip(valuesIterator)
48+
49+
/** The value stored at index `i` */
50+
def value(i: Int) = values(i)
51+
52+
/** Change the value stored at index `i` to `v` */
53+
def setValue(i: Int, v: Int) = values(i) = v
54+
55+
override def toString =
56+
iterator.map((k, v) => s"$k -> $v").mkString("IntMap(", ", ", ")")
57+
end IntMap
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
package dotty.tools.dotc.util
2+
3+
object PerfectHashing:
4+
5+
/** The number of elements up to which dense packing is used.
6+
* If the number of elements reaches `DenseLimit` a hash table is used instead
7+
*/
8+
inline val DenseLimit = 16
9+
10+
/** A map that maps keys to unique integers in a dense interval starting at 0.
11+
* @param initialCapacity Indicates the initial number of slots in the hash table.
12+
* The actual number of slots is always a power of 2, so the
13+
* initial size of the table will be the smallest power of two
14+
* that is equal or greater than the given `initialCapacity`.
15+
* Minimum value is 4.
16+
* @param capacityMultiple The minimum multiple of capacity relative to used elements.
17+
* The hash table will be re-sized once the number of elements
18+
* multiplied by capacityMultiple exceeds the current size of the hash table.
19+
* However, a table of size up to DenseLimit will be re-sized only
20+
* once the number of elements reaches the table's size.
21+
*/
22+
class PerfectHashing[Key](initialCapacity: Int = 8, capacityMultiple: Int = 2):
23+
import PerfectHashing.DenseLimit
24+
25+
private var used: Int = _
26+
private var table: Array[Int] = _
27+
private var keys: Array[AnyRef] = _
28+
29+
clear()
30+
31+
protected def allocate(capacity: Int) =
32+
keys = new Array[AnyRef](capacity)
33+
if capacity > DenseLimit then
34+
table = new Array[Int](capacity * roundToPower(capacityMultiple))
35+
36+
private def roundToPower(n: Int) =
37+
if Integer.bitCount(n) == 1 then n
38+
else 1 << (32 - Integer.numberOfLeadingZeros(n))
39+
40+
/** Remove keys from this map and set back to initial configuration */
41+
def clear(): Unit =
42+
used = 0
43+
allocate(roundToPower(initialCapacity max 4))
44+
45+
/** The number of keys */
46+
final def size: Int = used
47+
48+
/** The number of keys that can be stored without growing the tables */
49+
final def capacity: Int = keys.length
50+
51+
private final def isDense = capacity <= DenseLimit
52+
53+
/** Hashcode, by default `x.hashCode`, can be overridden */
54+
protected def hash(x: Key): Int = x.hashCode
55+
56+
/** Hashcode, by default `equals`, can be overridden */
57+
protected def isEqual(x: Key, y: Key): Boolean = x.equals(y)
58+
59+
private def matches(entry: Int, k: Key) = isEqual(key(entry), k)
60+
61+
private def tableIndex(x: Int): Int = x & (table.length - 1)
62+
private def firstIndex(k: Key) = tableIndex(hash(k))
63+
private def nextIndex(idx: Int) = tableIndex(idx + 1)
64+
65+
/** The key at index `idx` */
66+
def key(idx: Int) = keys(idx).asInstanceOf[Key]
67+
68+
private def setKey(e: Int, k: Key) = keys(e) = k.asInstanceOf[AnyRef]
69+
70+
private def entry(idx: Int): Int = table(idx) - 1
71+
private def setEntry(idx: Int, entry: Int) = table(idx) = entry + 1
72+
73+
/** An index `idx` such that `key(idx) == k`, or -1 if no such index exists */
74+
def index(k: Key): Int =
75+
if isDense then
76+
var e = 0
77+
while e < used do
78+
if matches(e, k) then return e
79+
e += 1
80+
-1
81+
else
82+
var idx = firstIndex(k)
83+
var e = entry(idx)
84+
while e >= 0 && !matches(e, k) do
85+
idx = nextIndex(idx)
86+
e = entry(idx)
87+
e
88+
89+
/** An index `idx` such that key(idx) == k.
90+
* If no such index exists, create an entry with an index one
91+
* larger than the previous one.
92+
*/
93+
def add(k: Key): Int =
94+
if isDense then
95+
var e = 0
96+
while e < used do
97+
if matches(e, k) then return e
98+
e += 1
99+
else
100+
var idx = firstIndex(k)
101+
var e = entry(idx)
102+
while e >= 0 do
103+
if matches(e, k) then return e
104+
idx = nextIndex(idx)
105+
e = entry(idx)
106+
setEntry(idx, used)
107+
end if
108+
setKey(used, k)
109+
used = used + 1
110+
if used == capacity then growTable()
111+
used - 1
112+
113+
private def rehash(): Unit =
114+
var e = 0
115+
while e < used do
116+
var idx = firstIndex(key(e))
117+
while entry(idx) >= 0 do idx = nextIndex(idx)
118+
setEntry(idx, e)
119+
e += 1
120+
121+
/** Grow backing arrays */
122+
protected def growTable(): Unit =
123+
val oldKeys = keys
124+
allocate(capacity * 2)
125+
Array.copy(oldKeys, 0, keys, 0, oldKeys.length)
126+
if !isDense then rehash()
127+
128+
def keysIterator: Iterator[Key] =
129+
keys.iterator.take(used).asInstanceOf[Iterator[Key]]
130+
end PerfectHashing
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
trait Generator[+T]:
2+
self =>
3+
def generate: T
4+
def map[S](f: T => S) = new Generator[S]:
5+
def generate: S = f(self.generate)
6+
def flatMap[S](f: T => Generator[S]) = new Generator[S]:
7+
def generate: S = f(self.generate).generate
8+
9+
object Generator:
10+
val NumLimit = 300
11+
val Iterations = 10000
12+
13+
given integers as Generator[Int]:
14+
val rand = new java.util.Random
15+
def generate = rand.nextInt()
16+
17+
given booleans as Generator[Boolean] =
18+
integers.map(x => x > 0)
19+
20+
def range(end: Int): Generator[Int] =
21+
integers.map(x => (x % end).abs)
22+
23+
enum Op:
24+
case Lookup, Update, Remove
25+
export Op._
26+
27+
given ops as Generator[Op] =
28+
range(10).map {
29+
case 0 | 1 | 2 | 3 => Lookup
30+
case 4 | 5 | 6 | 7 => Update
31+
case 8 | 9 => Remove
32+
}
33+
34+
val nums: Generator[Integer] = range(NumLimit).map(Integer(_))
35+
36+
@main def Test =
37+
import Generator._
38+
39+
val map1 = dotty.tools.dotc.util.IntMap[Integer]()
40+
val map2 = scala.collection.mutable.HashMap[Integer, Integer]()
41+
42+
def toOption(n: Int): Option[Integer] =
43+
if n < 0 then None else Some(n)
44+
45+
def checkSame() =
46+
assert(map1.size == map2.size)
47+
for (k, v) <- map1.iterator do
48+
assert(map2.get(k) == Some(v), s"difference: $map1 / $map2, k = $k, v1 = $v, get2 = ${map2.get(k)}")
49+
for (k, v) <- map2.iterator do
50+
assert(toOption(map1(k)) == Some(v))
51+
52+
def lookupTest(num: Integer) =
53+
//println(s"test lookup $num")
54+
val res1 = toOption(map1(num))
55+
val res2 = map2.get(num)
56+
assert(res1 == res2)
57+
58+
def updateTest(num: Integer) =
59+
//println(s"test update $num")
60+
lookupTest(num)
61+
map1(num) = num
62+
map2(num) = num
63+
checkSame()
64+
65+
def removeTest(num: Integer) = ()/*
66+
//println(s"test remove $num")
67+
map1.remove(num)
68+
map2.remove(num)
69+
checkSame()*/
70+
71+
for i <- 0 until Iterations do
72+
//if i % 1000 == 0 then println(map1.size)
73+
val num = nums.generate
74+
Generator.ops.generate match
75+
case Lookup => lookupTest(num)
76+
case Update => updateTest(num)
77+
case Remove => removeTest(num)

0 commit comments

Comments
 (0)