aws · jeskew · Jun 12, 2017 · May 23, 2017 · May 26, 2017 · May 27, 2017
diff --git a/.gitignore b/.gitignore
@@ -20,3 +20,4 @@ jspm_packages
 .yarn-integrity
 
 lerna-debug.log
+packages/*/package-lock.json
diff --git a/packages/util-utf8-browser/.gitignore b/packages/util-utf8-browser/.gitignore
@@ -0,0 +1,4 @@
+/node_modules/
+*.js
+*.js.map
+*.d.ts
diff --git a/packages/util-utf8-browser/__tests__/index.ts b/packages/util-utf8-browser/__tests__/index.ts
@@ -0,0 +1,70 @@
+import {fromUtf8, toUtf8} from '../';
+
+jest.mock('../lib/pureJs', () => {
+    return {
+        fromUtf8: jest.fn(() => new Uint8Array(0)),
+        toUtf8: jest.fn(() => ''),
+    };
+});
+import {
+    fromUtf8 as jsFromUtf8,
+    toUtf8 as jsToUtf8,
+} from '../lib/pureJs';
+
+jest.mock('../lib/whatwgEncodingApi', () => {
+    return {
+        fromUtf8: jest.fn(() => new Uint8Array(0)),
+        toUtf8: jest.fn(() => ''),
+    };
+});
+import {
+    fromUtf8 as textEncoderFromUtf8,
+    toUtf8 as textEncoderToUtf8,
+} from '../lib/whatwgEncodingApi';
+
+beforeEach(() => {
+    (jsFromUtf8 as any).mockClear();
+    (jsToUtf8 as any).mockClear();
+    (textEncoderFromUtf8 as any).mockClear();
+    (textEncoderToUtf8 as any).mockClear();
+});
+
+describe('fromUtf8', () => {
+    it('should use the Encoding API if available', () => {
+        (global as any).TextEncoder = jest.fn() as any;
+
+        fromUtf8('foo');
+
+        expect((textEncoderFromUtf8 as any).mock.calls.length).toBe(1);
+        expect((jsFromUtf8 as any).mock.calls.length).toBe(0);
+    });
+
+    it('should use a JS implementation otherwise', () => {
+        delete (global as any).TextEncoder;
+
+        fromUtf8('foo');
+
+        expect((textEncoderFromUtf8 as any).mock.calls.length).toBe(0);
+        expect((jsFromUtf8 as any).mock.calls.length).toBe(1);
+    });
+});
+
+describe('toUtf8', () => {
+    it('should use the Encoding API if available', () => {
+        (global as any).TextDecoder = jest.fn() as any;
+
+        toUtf8(new Uint8Array(0));
+
+        expect((textEncoderToUtf8 as any).mock.calls.length).toBe(1);
+        expect((jsToUtf8 as any).mock.calls.length).toBe(0);
+    });
+
+    it('should use a JS implementation otherwise', () => {
+        delete (global as any).TextDecoder;
+
+        toUtf8(new Uint8Array(0));
+
+        expect((textEncoderToUtf8 as any).mock.calls.length).toBe(0);
+        expect((jsToUtf8 as any).mock.calls.length).toBe(1);
+    });
+});
diff --git a/packages/util-utf8-browser/__tests__/pureJs.ts b/packages/util-utf8-browser/__tests__/pureJs.ts
@@ -0,0 +1,26 @@
+import {fromUtf8, toUtf8} from '../lib/pureJs';
+
+const utf8StringsToByteArrays: {[key: string]: Uint8Array} = {
+    'ABC': new Uint8Array(['A'.charCodeAt(0), 'B'.charCodeAt(0), 'C'.charCodeAt(0)]),
+    '🐎👱❤': new Uint8Array([240, 159, 144, 142, 240, 159, 145, 177, 226, 157, 164]),
+    '☃💩': new Uint8Array([226, 152, 131, 240, 159, 146, 169]),
+    'The rain in Spain falls mainly on the plain.': new Uint8Array([84, 104, 101, 32, 114, 97, 105, 110, 32, 105, 110, 32, 83, 112, 97, 105, 110, 32, 102, 97, 108, 108, 115, 32, 109, 97, 105, 110, 108, 121, 32, 111, 110, 32, 116, 104, 101, 32, 112, 108, 97, 105, 110, 46 ]),
+    'دست‌نوشته‌ها نمی‌سوزند': new Uint8Array([216, 175, 216, 179, 216, 170, 226, 128, 140, 217, 134, 217, 136, 216, 180, 216, 170, 217, 135, 226, 128, 140, 217, 135, 216, 167, 32, 217, 134, 217, 133, 219, 140, 226, 128, 140, 216, 179, 217, 136, 216, 178, 217, 134, 216, 175]),
+    'Рукописи не горят': new Uint8Array([208, 160, 209, 131, 208, 186, 208, 190, 208, 191, 208, 184, 209, 129, 208, 184, 32, 208, 189, 208, 181, 32, 208, 179, 208, 190, 209, 128, 209, 143, 209, 130 ]),
+};
+
+describe('fromUtf8', () => {
+    for (let string of Object.keys(utf8StringsToByteArrays)) {
+        it(`should UTF-8 decode "${string}" to the correct value`, () => {
+            expect(fromUtf8(string)).toEqual(utf8StringsToByteArrays[string]);
+        });
+    }
+});
+
+describe('toUtf8', () => {
+    for (let string of Object.keys(utf8StringsToByteArrays)) {
+        it(`should derive "${string}" from the UTF-8 decoded bytes`, () => {
+            expect(toUtf8(utf8StringsToByteArrays[string])).toBe(string);
+        });
+    }
+});
diff --git a/packages/util-utf8-browser/__tests__/whatwgEncodingApi.ts b/packages/util-utf8-browser/__tests__/whatwgEncodingApi.ts
@@ -0,0 +1,51 @@
+import {
+    fromUtf8,
+    toUtf8,
+} from '../lib/whatwgEncodingApi';
+
+beforeEach(() => {
+    const textDecoderInstance = {
+        decode: jest.fn(() => ''),
+    };
+    const textEncoderInstance = {
+        encode: jest.fn(() => new Uint8Array(0)),
+    };
+
+    (global as any).TextDecoder = jest.fn(() => textDecoderInstance) as any;
+    (global as any).TextEncoder = jest.fn(() => textEncoderInstance) as any;
+});
+
+interface TextDecoderCtor {
+    new (): any;
+}
+interface TextEncoderCtor {
+    new (): any;
+}
+declare const TextDecoder: TextDecoderCtor;
+declare const TextEncoder: TextEncoderCtor;
+
+describe('WHATWG encoding spec compliant environment UTF-8 handling', () => {
+    it('should use the global TextDecoder to decode UTF-8', () => {
+        const decoder = new TextDecoder();
+        (TextDecoder as any).mockClear();
+
+        expect((TextDecoder as any).mock.calls.length).toBe(0);
+
+        toUtf8(new Uint8Array(0));
+
+        expect((TextDecoder as any).mock.calls.length).toBe(1);
+        expect((decoder.decode as any).mock.calls.length).toBe(1);
+    });
+
+    it('should use the global TextEncoder to encode UTF-8', () => {
+        const encoder = new TextEncoder();
+        (TextEncoder as any).mockClear();
+
+        expect((TextEncoder as any).mock.calls.length).toBe(0);
+
+        fromUtf8('string');
+
+        expect((TextEncoder as any).mock.calls.length).toBe(1);
+        expect((encoder.encode as any).mock.calls.length).toBe(1);
+    });
+});
diff --git a/packages/util-utf8-browser/index.ts b/packages/util-utf8-browser/index.ts
@@ -0,0 +1,27 @@
+import {
+    fromUtf8 as jsFromUtf8,
+    toUtf8 as jsToUtf8,
+} from './lib/pureJs';
+import {
+    fromUtf8 as textEncoderFromUtf8,
+    toUtf8 as textEncoderToUtf8,
+} from './lib/whatwgEncodingApi';
+
+declare const TextDecoder: Function|undefined;
+declare const TextEncoder: Function|undefined;
+
+export function fromUtf8(input: string): Uint8Array {
+    if (typeof TextEncoder === 'function') {
+        return textEncoderFromUtf8(input);
+    }
+
+    return jsFromUtf8(input);
+}
+
+export function toUtf8(input: Uint8Array): string {
+    if (typeof TextDecoder === 'function') {
+        return textEncoderToUtf8(input);
+    }
+
+    return jsToUtf8(input);
+}
diff --git a/packages/util-utf8-browser/lib/pureJs.ts b/packages/util-utf8-browser/lib/pureJs.ts
@@ -0,0 +1,80 @@
+/**
+ * Converts a JS string from its native UCS-2/UTF-16 representation into a
+ * Uint8Array of the bytes used to represent the equivalent characters in UTF-8.
+ *
+ * Cribbed from the `goog.crypt.stringToUtf8ByteArray` function in the Google
+ * Closure library, though updated to use typed arrays.
+ */
+export function fromUtf8(input: string): Uint8Array {
+    const bytes: Array<number> = [];
+    for (let i = 0, len = input.length; i < len; i++) {
+        const value = input.charCodeAt(i);
+        if (value < 0x80) {
+            bytes.push(value);
+        } else if (value < 0x800) {
+            bytes.push(
+                (value >> 6) | 0b11000000,
+                (value & 0b111111) | 0b10000000
+            );
+        } else if (
+            i + 1 < input.length &&
+            ((value & 0xfc00) === 0xd800) &&
+            ((input.charCodeAt(i + 1) & 0xfc00) === 0xdc00)
+        ) {
+            const surrogatePair = 0x10000 +
+                ((value & 0b1111111111) << 10) +
+                (input.charCodeAt(++i) & 0b1111111111);
+            bytes.push(
+                (surrogatePair >> 18) | 0b11110000,
+                ((surrogatePair >> 12) & 0b111111) | 0b10000000,
+                ((surrogatePair >> 6) & 0b111111) | 0b10000000,
+                (surrogatePair & 0b111111) | 0b10000000
+            );
+        } else {
+            bytes.push(
+                (value >> 12) | 0b11100000,
+                ((value >> 6) & 0b111111) | 0b10000000,
+                (value & 0b111111) | 0b10000000,
+            );
+        }
+    }
+
+    return Uint8Array.from(bytes);
+}
+
+/**
+ * Converts a typed array of bytes containing UTF-8 data into a native JS
+ * string.
+ *
+ * Partly cribbed from the `goog.crypt.utf8ByteArrayToString` function in the
+ * Google Closure library, though updated to use typed arrays and to better
+ * handle astral plane code points.
+ */
+export function toUtf8(input: Uint8Array): string {
+    let decoded = '';
+    for (let i = 0, len = input.length; i < len; i++) {
+        const byte = input[i];
+        if (byte < 0x80) {
+            decoded += String.fromCharCode(byte);
+        } else if (0b11000000 <= byte && byte < 0b11100000) {
+            const nextByte = input[++i];
+            decoded += String.fromCharCode(
+                (byte & 0b11111) << 6 | (nextByte & 0b111111)
+            );
+        } else if (0b11110000 <= byte && byte < 0b101101101) {
+            const surrogatePair = [byte, input[++i], input[++i], input[++i]];
+            const encoded = '%' + surrogatePair
+                .map(byteValue => byteValue.toString(16))
+                .join('%');
+            decoded += decodeURIComponent(encoded);
+        } else {
+            decoded += String.fromCharCode(
+                (byte & 0b1111) << 12 |
+                (input[++i] & 0b111111) << 6 |
+                (input[++i] & 0b111111)
+            );
+        }
+    }
+
+    return decoded;
+}
diff --git a/packages/util-utf8-browser/lib/whatwgEncodingApi.ts b/packages/util-utf8-browser/lib/whatwgEncodingApi.ts
@@ -0,0 +1,50 @@
+/**
+ * A declaration of the global TextEncoder and TextDecoder constructors.
+ *
+ * @see https://encoding.spec.whatwg.org/
+ */
+namespace Encoding {
+    interface TextDecoderOptions {
+        fatal?: boolean;
+        ignoreBOM?: boolean;
+    }
+
+    interface TextDecodeOptions {
+        stream?: boolean;
+    }
+
+    interface TextDecoder {
+        readonly encoding: string;
+        readonly fatal: boolean;
+        readonly ignoreBOM: boolean;
+        decode(
+            input?: ArrayBuffer|ArrayBufferView,
+            options?: TextDecodeOptions
+        ): string;
+    }
+
+    export interface TextDecoderConstructor {
+        new (label?: string, options?: TextDecoderOptions): TextDecoder;
+    }
+
+    interface TextEncoder {
+        readonly encoding: 'utf-8';
+        encode(input?: string): Uint8Array;
+    }
+
+    export interface TextEncoderConstructor {
+        new (): TextEncoder;
+    }
+}
+
+declare const TextDecoder: Encoding.TextDecoderConstructor;
+
+declare const TextEncoder: Encoding.TextEncoderConstructor;
+
+export function fromUtf8(input: string): Uint8Array {
+    return new TextEncoder().encode(input);
+}
+
+export function toUtf8(input: Uint8Array): string {
+    return new TextDecoder('utf-8').decode(input);
+}
diff --git a/packages/util-utf8-browser/package.json b/packages/util-utf8-browser/package.json
@@ -0,0 +1,20 @@
+{
+  "name": "@aws/util-utf8-browser",
+  "private": true,
+  "version": "0.0.1",
+  "description": "A browser UTF-8 string <-> UInt8Array converter",
+  "main": "index.js",
+  "scripts": {
+    "prepublishOnly": "tsc",
+    "pretest": "tsc",
+    "test": "jest"
+  },
+  "author": "[email protected]",
+  "license": "UNLICENSED",
+  "devDependencies": {
+    "@types/jest": "^19.2.2",
+    "@types/node": "^7.0.12",
+    "jest": "^19.0.2",
+    "typescript": "^2.3"
+  }
+}
diff --git a/packages/util-utf8-browser/tsconfig.json b/packages/util-utf8-browser/tsconfig.json
@@ -0,0 +1,10 @@
+{
+  "compilerOptions": {
+    "target": "es5",
+    "module": "commonjs",
+    "declaration": true,
+    "sourceMap": true,
+    "strict": true,
+    "stripInternal": true
+  }
+}
diff --git a/packages/util-utf8-node/.gitignore b/packages/util-utf8-node/.gitignore
@@ -0,0 +1,4 @@
+/node_modules/
+*.js
+*.js.map
+*.d.ts
diff --git a/packages/util-utf8-node/__tests__/index.ts b/packages/util-utf8-node/__tests__/index.ts
@@ -0,0 +1,34 @@
+import {fromUtf8, toUtf8} from "../";
+
+const utf8StringsToByteArrays: {[key: string]: Uint8Array} = {
+    'ABC': new Uint8Array(['A'.charCodeAt(0), 'B'.charCodeAt(0), 'C'.charCodeAt(0)]),
+    '🐎👱❤': new Uint8Array([240, 159, 144, 142, 240, 159, 145, 177, 226, 157, 164]),
+    '☃💩': new Uint8Array([226, 152, 131, 240, 159, 146, 169]),
+    'The rain in Spain falls mainly on the plain.': new Uint8Array([84, 104, 101, 32, 114, 97, 105, 110, 32, 105, 110, 32, 83, 112, 97, 105, 110, 32, 102, 97, 108, 108, 115, 32, 109, 97, 105, 110, 108, 121, 32, 111, 110, 32, 116, 104, 101, 32, 112, 108, 97, 105, 110, 46 ]),
+    'دست‌نوشته‌ها نمی‌سوزند': new Uint8Array([216, 175, 216, 179, 216, 170, 226, 128, 140, 217, 134, 217, 136, 216, 180, 216, 170, 217, 135, 226, 128, 140, 217, 135, 216, 167, 32, 217, 134, 217, 133, 219, 140, 226, 128, 140, 216, 179, 217, 136, 216, 178, 217, 134, 216, 175]),
+    'Рукописи не горят': new Uint8Array([208, 160, 209, 131, 208, 186, 208, 190, 208, 191, 208, 184, 209, 129, 208, 184, 32, 208, 189, 208, 181, 32, 208, 179, 208, 190, 209, 128, 209, 143, 209, 130 ]),
+};
+
+describe('fromUtf8', () => {
+    for (let string of Object.keys(utf8StringsToByteArrays)) {
+        it(`should UTF-8 decode "${string}" to the correct value`, () => {
+            expect(fromUtf8(string)).toEqual(utf8StringsToByteArrays[string]);
+        });
+    }
+
+    it('should throw when given a number', () => {
+        expect(() => fromUtf8(255 as any)).toThrow();
+    });
+});
+
+describe('toUtf8', () => {
+    for (let string of Object.keys(utf8StringsToByteArrays)) {
+        it(`should derive "${string}" from the UTF-8 decoded bytes`, () => {
+            expect(toUtf8(utf8StringsToByteArrays[string])).toBe(string);
+        });
+    }
+
+    it('should throw when given a number', () => {
+        expect(() => toUtf8(255 as any)).toThrow();
+    });
+});
Original file line number	Diff line number	Diff line change
Expand Up		@@ -20,3 +20,4 @@ jspm_packages
		.yarn-integrity

		lerna-debug.log
		packages/*/package-lock.json