Skip to content

Commit 8a7cddb

Browse files
committed
Fix the uniqueItems implementation to accommodate non-primitive values
JavaScript Sets do not work in the same fashion as HashSets do in Java, in that object equivalence is determined by reference, not by value. In order to work around this, this commit adds a duplicate finder that works on the same principals as a value-based hashtable: it creates a hash of the object's value to bucket objects and then uses deep equality to determine if a hash collision is actually a duplicate value.
1 parent eb9eda0 commit 8a7cddb

File tree

8 files changed

+259
-13
lines changed

8 files changed

+259
-13
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dist/

smithy-typescript-ssdk-libs/server-apigateway/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
"postbuild": "rimraf dist/types/ts3.4 && downlevel-dts dist/types dist/types/ts3.4",
1212
"test": "jest --passWithNoTests",
1313
"clean": "rimraf dist",
14-
"lint": "yarn run eslint -c ../.eslintrc.js **/src/**/*.ts",
15-
"format": "prettier --config ../prettier.config.js --write **/*.{ts,js,md,json}"
14+
"lint": "yarn run eslint -c ../.eslintrc.js \"src/**/*.ts\"",
15+
"format": "prettier --config ../prettier.config.js --ignore-path ../.prettierignore --write \"**/*.{ts,md,json}\""
1616
},
1717
"repository": {
1818
"type": "git",

smithy-typescript-ssdk-libs/server-common/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
"postbuild": "rimraf dist/types/ts3.4 && downlevel-dts dist/types dist/types/ts3.4",
1212
"test": "jest",
1313
"clean": "rimraf dist",
14-
"format": "prettier --config ../prettier.config.js --write **/*.{ts,js,md,json}"
14+
"lint": "yarn run eslint -c ../.eslintrc.js \"src/**/*.ts\"",
15+
"format": "prettier --config ../prettier.config.js --ignore-path ../.prettierignore --write \"**/*.{ts,md,json}\""
1516
},
1617
"repository": {
1718
"type": "git",

smithy-typescript-ssdk-libs/server-common/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export * as httpbinding from "./httpbinding";
1717
export * from "./accept";
1818
export * from "./errors";
1919
export * from "./validation";
20+
export * from "./unique";
2021

2122
import { HttpRequest, HttpResponse } from "@aws-sdk/protocol-http";
2223
import { SmithyException } from "@aws-sdk/smithy-client";
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*
2+
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
import * as util from "util";
17+
18+
import { findDuplicates, Input } from "./unique";
19+
20+
describe("findDuplicates", () => {
21+
describe("finds duplicates in", () => {
22+
it("strings", () => {
23+
expect(findDuplicates(["a", "b", "c", "a", "b", "a"])).toEqual(["a", "b"]);
24+
expect(findDuplicates(["x", "y", "z", "a", "b", "c", "a", "b"])).toEqual(["a", "b"]);
25+
});
26+
it("numbers", () => {
27+
expect(findDuplicates([1, 2, 3, 4, 1, 2])).toEqual([1, 2]);
28+
});
29+
it("booleans", () => {
30+
expect(findDuplicates([true, false, true])).toEqual([true]);
31+
});
32+
it("arrays", () => {
33+
expect(
34+
findDuplicates([
35+
[5, 6],
36+
[2, 3],
37+
[1, 2],
38+
[3, 4],
39+
[1, 2],
40+
])
41+
).toEqual([[1, 2]]);
42+
});
43+
it("Dates", () => {
44+
expect(findDuplicates([new Date(1000), new Date(2000), new Date(1000)])).toEqual([new Date(1000)]);
45+
});
46+
it("Blobs", () => {
47+
expect(findDuplicates([Uint8Array.of(1, 2, 3), Uint8Array.of(4, 5, 6), Uint8Array.of(4, 5, 6)])).toEqual([
48+
Uint8Array.of(4, 5, 6),
49+
]);
50+
});
51+
it("objects", () => {
52+
expect(findDuplicates([{ a: "b" }, { b: [1, 2, 3] }, { a: "b" }, { a: "b" }])).toEqual([{ a: "b" }]);
53+
expect(findDuplicates([{ a: "b" }, { b: 1, c: 2 }, { c: 2, b: 1 }])).toEqual([{ b: 1, c: 2 }]);
54+
});
55+
it("deeply nested objects", () => {
56+
expect(
57+
findDuplicates([
58+
{ a: { b: { c: [1, { d: 2 }, [3]] } } },
59+
2,
60+
[3, 4],
61+
{ b: "c" },
62+
{ a: { b: { c: [1, { d: 2 }, [3]] } } },
63+
])
64+
).toEqual([{ a: { b: { c: [1, { d: 2 }, [3]] } } }]);
65+
});
66+
});
67+
describe("correctly does not find duplicates in", () => {
68+
it("strings", () => {
69+
expect(findDuplicates(["a", "b", "c"])).toEqual([]);
70+
});
71+
it("numbers", () => {
72+
expect(findDuplicates([1, 2, 3, 4])).toEqual([]);
73+
expect(findDuplicates([1, 2, "1", "2"])).toEqual([]);
74+
});
75+
it("booleans", () => {
76+
expect(findDuplicates([true, false])).toEqual([]);
77+
expect(findDuplicates([true, false, "true", "false"])).toEqual([]);
78+
});
79+
it("arrays", () => {
80+
expect(
81+
findDuplicates([
82+
[1, 2],
83+
[2, 3],
84+
[3, 4],
85+
])
86+
).toEqual([]);
87+
expect(
88+
findDuplicates([
89+
[1, 2],
90+
["1", "2"],
91+
])
92+
).toEqual([]);
93+
});
94+
it("objects", () => {
95+
expect(findDuplicates([{ a: "b" }, { b: [1, 2, 3] }])).toEqual([]);
96+
expect(findDuplicates([{ a: 1 }, { a: "1" }])).toEqual([]);
97+
});
98+
it("Dates", () => {
99+
expect(findDuplicates([new Date(100), new Date(200), new Date(101)])).toEqual([]);
100+
});
101+
it("blobs", () => {
102+
expect(findDuplicates([Uint8Array.of(1, 2, 3), Uint8Array.of(1, 2)])).toEqual([]);
103+
});
104+
});
105+
106+
// This is relatively slow and may be flaky if the input size is tuned to let it run reasonably fast
107+
it.skip("is faster than the naive implementation", () => {
108+
const input: Input[] = [true, false, 1, 2, 3, 4, 5, 6];
109+
for (let i = 0; i < 10_000; i++) {
110+
input.push({ a: [1, 2, 3, i], b: { nested: [true] } });
111+
}
112+
113+
const uniqueStart = Date.now();
114+
expect(findDuplicates(input)).toEqual([]);
115+
const uniqueEnd = Date.now();
116+
const uniqueDuration = (uniqueEnd - uniqueStart) / 1000;
117+
118+
console.log(`findDuplicates finished in ${uniqueDuration} seconds`);
119+
120+
const naiveStart = Date.now();
121+
expect(naivefindDuplicates(input)).toEqual([]);
122+
const naiveEnd = Date.now();
123+
const naiveDuration = (naiveEnd - naiveStart) / 1000;
124+
125+
console.log(`naivefindDuplicates finished in ${naiveDuration} seconds`);
126+
127+
expect(naiveDuration).toBeGreaterThan(uniqueDuration);
128+
});
129+
130+
// This isn't even correct! It should be even slower, it just returns the first duplicate!
131+
function naivefindDuplicates(input: Array<Input>): Input | undefined {
132+
for (let i = 0; i < input.length - 1; i++) {
133+
for (let j = i + 1; j < input.length; j++) {
134+
if (util.isDeepStrictEqual(input[i], input[j])) {
135+
return [input[i]];
136+
}
137+
}
138+
}
139+
return [];
140+
}
141+
});
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
import { createHash } from "crypto";
17+
import * as util from "util";
18+
19+
/**
20+
* A shortcut for JSON and Smithy primitives, as well as documents and Smithy-
21+
* modeled structures composed of those primitives
22+
*/
23+
export type Input = { [key: string]: Input } | Array<Input> | Date | Uint8Array | string | number | boolean | null;
24+
25+
/**
26+
* Returns an array of duplicated values in the input. This is equivalent to using
27+
* {@link util#isDeepStrictEqual} to compare every member of the input to all the
28+
* other members, but with an optimization to make the runtime complexity O(n)
29+
* instead of O(n^2).
30+
*
31+
* @param input an array of {@link Input}
32+
* @return an array containing one instance of every duplicated member of the input,
33+
* or an empty array if there are no duplicates
34+
*/
35+
export const findDuplicates = (input: Array<Input>): Array<Input> => {
36+
const potentialCollisions: { [hash: string]: { value: Input; alreadyFound: boolean }[] } = {};
37+
const collisions: Array<Input> = [];
38+
39+
for (const value of input) {
40+
const valueHash = hash(value);
41+
if (!potentialCollisions.hasOwnProperty(valueHash)) {
42+
potentialCollisions[valueHash] = [{ value: value, alreadyFound: false }];
43+
} else {
44+
let duplicateFound = false;
45+
for (const potentialCollision of potentialCollisions[valueHash]) {
46+
if (util.isDeepStrictEqual(value, potentialCollision.value)) {
47+
duplicateFound = true;
48+
if (!potentialCollision.alreadyFound) {
49+
collisions.push(value);
50+
potentialCollision.alreadyFound = true;
51+
}
52+
}
53+
}
54+
if (!duplicateFound) {
55+
potentialCollisions[valueHash].push({ value: value, alreadyFound: false });
56+
}
57+
}
58+
}
59+
return collisions;
60+
};
61+
62+
const hash = (input: Input): string => {
63+
return createHash("sha256").update(canonicalize(input)).digest("base64");
64+
};
65+
66+
/**
67+
* Since node's hash functions operate on strings or buffers, we need a canonical format for
68+
* our objects in order to hash them correctly. This function turns them into string representations
69+
* where the types are encoded in order to avoid ambiguity, for instance, between the string "1" and
70+
* the number 1. This method sorts object keys lexicographically in order to maintain consistency.
71+
*
72+
* This doesn't just call JSON.stringify because we want to have firm control over the ordering of map
73+
* keys and the handling of blobs and dates
74+
*
75+
* @param input a JSON-like object
76+
* @return a canonical string representation
77+
*/
78+
const canonicalize = (input: Input): string => {
79+
if (input === null) {
80+
return "null()";
81+
}
82+
if (typeof input === "string" || typeof input === "number" || typeof input === "boolean") {
83+
return `${typeof input}(${input.toString()})`;
84+
}
85+
if (Array.isArray(input)) {
86+
return "array(" + input.map((i) => canonicalize(i)).join(",") + ")";
87+
}
88+
if (input instanceof Date) {
89+
return "date(" + input.getTime() + ")";
90+
}
91+
if (input instanceof Uint8Array) {
92+
// hashing the blob just to avoid allocating another base64 copy of its data
93+
return "blob(" + createHash("sha256").update(input).digest("base64") + ")";
94+
}
95+
96+
const contents: Array<string> = [];
97+
for (const key of Object.keys(input).slice().sort()) {
98+
contents.push("key(" + key + ")->value(" + canonicalize(input[key]) + ")");
99+
}
100+
return "map(" + contents.join(",") + ")";
101+
};

smithy-typescript-ssdk-libs/server-common/src/validation/validators.spec.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,4 +231,12 @@ describe("uniqueItems", () => {
231231
path: "aField",
232232
});
233233
});
234+
describe("supports objects", () => {
235+
expect(validator.validate([{ a: 1 }, { b: 2 }], "aField")).toBeNull();
236+
expect(validator.validate([{ a: 1 }, { b: 2 }, { a: 1 }], "aField")).toEqual({
237+
constraintType: "uniqueItems",
238+
failureValue: [{ a: 1 }],
239+
path: "aField",
240+
});
241+
});
234242
});

smithy-typescript-ssdk-libs/server-common/src/validation/validators.ts

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import { RE2 } from "re2-wasm";
1717

18+
import { findDuplicates } from "../unique";
1819
import {
1920
EnumValidationFailure,
2021
LengthValidationFailure,
@@ -300,17 +301,9 @@ export class UniqueItemsValidator implements SingleConstraintValidator<Array<any
300301
return null;
301302
}
302303

303-
const repeats = new Set<any>();
304-
const uniqueValues = new Set<any>();
305-
for (const i of input) {
306-
if (uniqueValues.has(i)) {
307-
repeats.add(i);
308-
} else {
309-
uniqueValues.add(i);
310-
}
311-
}
304+
const repeats = findDuplicates(input);
312305

313-
if (repeats.size > 0) {
306+
if (repeats.length > 0) {
314307
return {
315308
constraintType: "uniqueItems",
316309
path: path,

0 commit comments

Comments
 (0)