Skip to content

Commit c568610

Browse files
committed
Fix the uniqueItems implementation to accommodate non-primitive values
JavaScript Sets do not work in the same fashion as HashSets do in Java, in that object equivalence is determined by reference, not by value. In order to work around this, this commit adds a duplicate finder that works on the same principals as a value-based hashtable: it creates a hash of the object's value to bucket objects and then uses deep equality to determine if a hash collision is actually a duplicate value.
1 parent 41d0317 commit c568610

File tree

8 files changed

+254
-13
lines changed

8 files changed

+254
-13
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dist/

smithy-typescript-ssdk-libs/server-apigateway/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
"postbuild": "rimraf dist/types/ts3.4 && downlevel-dts dist/types dist/types/ts3.4",
1212
"test": "jest --passWithNoTests",
1313
"clean": "rimraf dist",
14-
"lint": "yarn run eslint -c ../.eslintrc.js **/src/**/*.ts",
15-
"format": "prettier --config ../prettier.config.js --write **/*.{ts,js,md,json}"
14+
"lint": "yarn run eslint -c ../.eslintrc.js \"src/**/*.ts\"",
15+
"format": "prettier --config ../prettier.config.js --ignore-path ../.prettierignore --write \"**/*.{ts,md,json}\""
1616
},
1717
"repository": {
1818
"type": "git",

smithy-typescript-ssdk-libs/server-common/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
"postbuild": "rimraf dist/types/ts3.4 && downlevel-dts dist/types dist/types/ts3.4",
1212
"test": "jest",
1313
"clean": "rimraf dist",
14-
"format": "prettier --config ../prettier.config.js --write **/*.{ts,js,md,json}"
14+
"lint": "yarn run eslint -c ../.eslintrc.js \"src/**/*.ts\"",
15+
"format": "prettier --config ../prettier.config.js --ignore-path ../.prettierignore --write \"**/*.{ts,md,json}\""
1516
},
1617
"repository": {
1718
"type": "git",

smithy-typescript-ssdk-libs/server-common/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export * as httpbinding from "./httpbinding";
1717
export * from "./accept";
1818
export * from "./errors";
1919
export * from "./validation";
20+
export * from "./unique";
2021

2122
import { HttpRequest, HttpResponse } from "@aws-sdk/protocol-http";
2223
import { SmithyException } from "@aws-sdk/smithy-client";
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*
2+
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
import * as util from "util";
17+
18+
import { findDuplicates, Input } from "./unique";
19+
20+
describe("findDuplicates", () => {
21+
describe("finds duplicates in", () => {
22+
it("strings", () => {
23+
expect(findDuplicates(["a", "b", "c", "a", "b", "a"])).toEqual(["a", "b"]);
24+
expect(findDuplicates(["x", "y", "z", "a", "b", "c", "a", "b"])).toEqual(["a", "b"]);
25+
});
26+
it("numbers", () => {
27+
expect(findDuplicates([1, 2, 3, 4, 1, 2])).toEqual([1, 2]);
28+
});
29+
it("booleans", () => {
30+
expect(findDuplicates([true, false, true])).toEqual([true]);
31+
});
32+
it("arrays", () => {
33+
expect(
34+
findDuplicates([
35+
[5, 6],
36+
[2, 3],
37+
[1, 2],
38+
[3, 4],
39+
[1, 2],
40+
])
41+
).toEqual([[1, 2]]);
42+
});
43+
it("Dates", () => {
44+
expect(findDuplicates([new Date(1000), new Date(2000), new Date(1000)])).toEqual([new Date(1000)]);
45+
});
46+
it("Blobs", () => {
47+
expect(findDuplicates([Uint8Array.of(1, 2, 3), Uint8Array.of(4, 5, 6), Uint8Array.of(4, 5, 6)])).toEqual([
48+
Uint8Array.of(4, 5, 6),
49+
]);
50+
});
51+
it("objects", () => {
52+
expect(findDuplicates([{ a: "b" }, { b: [1, 2, 3] }, { a: "b" }, { a: "b" }])).toEqual([{ a: "b" }]);
53+
expect(findDuplicates([{ a: "b" }, { b: 1, c: 2 }, { c: 2, b: 1 }])).toEqual([{ b: 1, c: 2 }]);
54+
});
55+
it("deeply nested objects", () => {
56+
expect(
57+
findDuplicates([
58+
{ a: { b: { c: [1, { d: 2 }, [3]] } } },
59+
2,
60+
[3, 4],
61+
{ b: "c" },
62+
{ a: { b: { c: [1, { d: 2 }, [3]] } } },
63+
])
64+
).toEqual([{ a: { b: { c: [1, { d: 2 }, [3]] } } }]);
65+
});
66+
});
67+
describe("correctly does not find duplicates in", () => {
68+
it("strings", () => {
69+
expect(findDuplicates(["a", "b", "c"])).toEqual([]);
70+
});
71+
it("numbers", () => {
72+
expect(findDuplicates([1, 2, 3, 4])).toEqual([]);
73+
expect(findDuplicates([1, 2, "1", "2"])).toEqual([]);
74+
});
75+
it("booleans", () => {
76+
expect(findDuplicates([true, false])).toEqual([]);
77+
expect(findDuplicates([true, false, "true", "false"])).toEqual([]);
78+
});
79+
it("arrays", () => {
80+
expect(
81+
findDuplicates([
82+
[1, 2],
83+
[2, 3],
84+
[3, 4],
85+
])
86+
).toEqual([]);
87+
expect(
88+
findDuplicates([
89+
[1, 2],
90+
["1", "2"],
91+
])
92+
).toEqual([]);
93+
});
94+
it("objects", () => {
95+
expect(findDuplicates([{ a: "b" }, { b: [1, 2, 3] }])).toEqual([]);
96+
expect(findDuplicates([{ a: 1 }, { a: "1" }])).toEqual([]);
97+
});
98+
it("Dates", () => {
99+
expect(findDuplicates([new Date(100), new Date(200), new Date(101)])).toEqual([]);
100+
});
101+
it("blobs", () => {
102+
expect(findDuplicates([Uint8Array.of(1, 2, 3), Uint8Array.of(1, 2)])).toEqual([]);
103+
});
104+
});
105+
106+
// This is relatively slow and may be flaky if the input size is tuned to let it run reasonably fast
107+
it.skip("is faster than the naive implementation", () => {
108+
const input: Input[] = [true, false, 1, 2, 3, 4, 5, 6];
109+
for (let i = 0; i < 10_000; i++) {
110+
input.push({ a: [1, 2, 3, i], b: { nested: [true] } });
111+
}
112+
113+
const uniqueStart = Date.now();
114+
expect(findDuplicates(input)).toEqual([]);
115+
const uniqueEnd = Date.now();
116+
const uniqueDuration = (uniqueEnd - uniqueStart) / 1000;
117+
118+
console.log(`findDuplicates finished in ${uniqueDuration} seconds`);
119+
120+
const naiveStart = Date.now();
121+
expect(naivefindDuplicates(input)).toEqual([]);
122+
const naiveEnd = Date.now();
123+
const naiveDuration = (naiveEnd - naiveStart) / 1000;
124+
125+
console.log(`naivefindDuplicates finished in ${naiveDuration} seconds`);
126+
127+
expect(naiveDuration).toBeGreaterThan(uniqueDuration);
128+
});
129+
130+
// This isn't even correct! It should be even slower, it just returns the first duplicate!
131+
function naivefindDuplicates(input: Array<Input>): Input | undefined {
132+
for (let i = 0; i < input.length - 1; i++) {
133+
for (let j = i + 1; j < input.length; j++) {
134+
if (util.isDeepStrictEqual(input[i], input[j])) {
135+
return [input[i]];
136+
}
137+
}
138+
}
139+
return [];
140+
}
141+
});
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
import { createHash } from "crypto";
17+
import * as util from "util";
18+
19+
/**
20+
* A shortcut for document-like objects or Smithy structures.
21+
*/
22+
export type Input = { [key: string]: Input } | Array<Input> | Date | Uint8Array | string | number | boolean | null;
23+
24+
/**
25+
* Returns an array of duplicated values in the input. Unlike equals, duplicate
26+
* is determined by semantic equivalence (two objects representing the same
27+
* data, not just two objects at the same location in memory).
28+
*
29+
* @param input an array of JSON- or Smithy-modeled objects.
30+
*/
31+
export const findDuplicates = (input: Array<Input>): Array<Input> => {
32+
const potentialCollisions: { [hash: string]: { value: Input; alreadyFound: boolean }[] } = {};
33+
const collisions: Array<Input> = [];
34+
35+
for (const value of input) {
36+
const valueHash = hash(value);
37+
if (!potentialCollisions.hasOwnProperty(valueHash)) {
38+
potentialCollisions[valueHash] = [{ value: value, alreadyFound: false }];
39+
} else {
40+
let duplicateFound = false;
41+
for (const potentialCollision of potentialCollisions[valueHash]) {
42+
if (util.isDeepStrictEqual(value, potentialCollision.value)) {
43+
duplicateFound = true;
44+
if (!potentialCollision.alreadyFound) {
45+
collisions.push(value);
46+
}
47+
potentialCollision.alreadyFound = true;
48+
}
49+
}
50+
if (!duplicateFound) {
51+
potentialCollisions[valueHash].push({ value: value, alreadyFound: false });
52+
}
53+
}
54+
}
55+
return collisions;
56+
};
57+
58+
const hash = (input: Input): string => {
59+
return createHash("sha256").update(canonicalize(input)).digest("base64");
60+
};
61+
62+
/**
63+
* Since node's hash functions operate on strings or buffers, we need a canonical format for
64+
* our objects in order to hash them correctly. This function turns them into string representations
65+
* where the types are encoded in order to avoid ambiguity, for instance, between the string "1" and
66+
* the number 1. This method sorts object keys lexicographically in order to maintain consistency.
67+
*
68+
* This doesn't just call JSON.stringify because we want to have firm control over the ordering of map
69+
* keys and the handling of blobs
70+
*
71+
* @param input a JSON-like object
72+
* @return a canonical string representation
73+
*/
74+
const canonicalize = (input: Input): string => {
75+
if (input === null) {
76+
return "null()";
77+
}
78+
if (typeof input === "string" || typeof input === "number" || typeof input === "boolean") {
79+
return `${typeof input}(${input.toString()})`;
80+
}
81+
if (Array.isArray(input)) {
82+
return "array(" + input.map((i) => canonicalize(i)).join(",") + ")";
83+
}
84+
if (input instanceof Date) {
85+
return "date(" + input.getTime() + ")";
86+
}
87+
if (input instanceof Uint8Array) {
88+
return "blob(" + createHash("sha256").update(input).digest("base64") + ")";
89+
}
90+
91+
const contents: Array<string> = [];
92+
for (const key of Object.keys(input).slice().sort()) {
93+
contents.push("key(" + key + ")->value(" + canonicalize(input[key]) + ")");
94+
}
95+
return "map(" + contents.join(",") + ")";
96+
};

smithy-typescript-ssdk-libs/server-common/src/validation/validators.spec.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,4 +231,12 @@ describe("uniqueItems", () => {
231231
path: "aField",
232232
});
233233
});
234+
describe("supports documents", () => {
235+
expect(validator.validate([{ a: 1 }, { b: 2 }], "aField")).toBeNull();
236+
expect(validator.validate([{ a: 1 }, { b: 2 }, { a: 1 }], "aField")).toEqual({
237+
constraintType: "uniqueItems",
238+
failureValue: [{ a: 1 }],
239+
path: "aField",
240+
});
241+
});
234242
});

smithy-typescript-ssdk-libs/server-common/src/validation/validators.ts

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import { RE2 } from "re2-wasm";
1717

18+
import { findDuplicates } from "../unique";
1819
import {
1920
EnumValidationFailure,
2021
LengthValidationFailure,
@@ -300,17 +301,9 @@ export class UniqueItemsValidator implements SingleConstraintValidator<Array<any
300301
return null;
301302
}
302303

303-
const repeats = new Set<any>();
304-
const uniqueValues = new Set<any>();
305-
for (const i of input) {
306-
if (uniqueValues.has(i)) {
307-
repeats.add(i);
308-
} else {
309-
uniqueValues.add(i);
310-
}
311-
}
304+
const repeats = findDuplicates(input);
312305

313-
if (repeats.size > 0) {
306+
if (repeats.length > 0) {
314307
return {
315308
constraintType: "uniqueItems",
316309
path: path,

0 commit comments

Comments
 (0)