Skip to content

Stub unicode complexity #11960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 18, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 44 additions & 72 deletions stdlib/public/stubs/UnicodeNormalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,26 @@

#include "../SwiftShims/UnicodeShims.h"

#if !defined(__APPLE__)
#include "swift/Basic/Lazy.h"
#include "swift/Runtime/Config.h"
#include "swift/Runtime/Debug.h"
#include <stdint.h>

#include <algorithm>
#include <mutex>
#include <assert.h>
#if defined(__APPLE__)

// Declare a few external functions to avoid a dependency on ICU headers.
extern "C" {
typedef struct UBreakIterator UBreakIterator;
typedef enum UBreakIteratorType {} UBreakIteratorType;
typedef enum UErrorCode {} UErrorCode;
typedef uint16_t UChar;

void ubrk_close(UBreakIterator *);
UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *,
int32_t, UErrorCode *);
int32_t ubrk_preceding(UBreakIterator *, int32_t);
int32_t ubrk_following(UBreakIterator *, int32_t);
void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
}

#else

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdocumentation"
Expand All @@ -36,6 +48,16 @@

#pragma clang diagnostic pop

#endif

#if !defined(__APPLE__)
#include "swift/Basic/Lazy.h"
#include "swift/Runtime/Config.h"
#include "swift/Runtime/Debug.h"

#include <algorithm>
#include <mutex>
#include <assert.h>

static const UCollator *MakeRootCollator() {
UErrorCode ErrorCode = U_ZERO_ERROR;
Expand Down Expand Up @@ -88,11 +110,7 @@ class ASCIICollation {
for (unsigned char c = 0; c < 128; ++c) {
UErrorCode ErrorCode = U_ZERO_ERROR;
intptr_t NumCollationElts = 0;
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
UChar Buffer[1];
#else
uint16_t Buffer[1];
#endif
Buffer[0] = c;

UCollationElements *CollationIterator =
Expand Down Expand Up @@ -129,19 +147,13 @@ swift::_swift_stdlib_unicode_compare_utf16_utf16(const uint16_t *LeftString,
int32_t LeftLength,
const uint16_t *RightString,
int32_t RightLength) {
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
// ICU UChar type is platform dependent. In Cygwin, it is defined
// as wchar_t which size is 2. It seems that the underlying binary
// representation is same with swift utf16 representation.
// On Clang 4.0 under a recent Linux, ICU uses the built-in char16_t type.
return ucol_strcoll(GetRootCollator(),
reinterpret_cast<const UChar *>(LeftString), LeftLength,
reinterpret_cast<const UChar *>(RightString), RightLength);
#else
return ucol_strcoll(GetRootCollator(),
LeftString, LeftLength,
RightString, RightLength);
#endif
reinterpret_cast<const UChar *>(LeftString), LeftLength,
reinterpret_cast<const UChar *>(RightString), RightLength);
}

/// Compares the strings via the Unicode Collation Algorithm on the root locale.
Expand All @@ -159,12 +171,8 @@ swift::_swift_stdlib_unicode_compare_utf8_utf16(const unsigned char *LeftString,
UErrorCode ErrorCode = U_ZERO_ERROR;

uiter_setUTF8(&LeftIterator, reinterpret_cast<const char *>(LeftString), LeftLength);
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
uiter_setString(&RightIterator, reinterpret_cast<const UChar *>(RightString),
RightLength);
#else
uiter_setString(&RightIterator, RightString, RightLength);
#endif

uint32_t Diff = ucol_strcollIter(GetRootCollator(),
&LeftIterator, &RightIterator, &ErrorCode);
Expand Down Expand Up @@ -202,14 +210,9 @@ swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString,
void *swift::_swift_stdlib_unicodeCollationIterator_create(
const __swift_uint16_t *Str, __swift_uint32_t Length) {
UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
UCollationElements *CollationIterator = ucol_openElements(
GetRootCollator(), reinterpret_cast<const UChar *>(Str), Length,
&ErrorCode);
#else
UCollationElements *CollationIterator = ucol_openElements(
GetRootCollator(), Str, Length, &ErrorCode);
#endif
UCollationElements *CollationIterator =
ucol_openElements(GetRootCollator(), reinterpret_cast<const UChar *>(Str),
Length, &ErrorCode);
if (U_FAILURE(ErrorCode)) {
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
}
Expand Down Expand Up @@ -247,17 +250,11 @@ swift::_swift_stdlib_unicode_strToUpper(uint16_t *Destination,
const uint16_t *Source,
int32_t SourceLength) {
UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
uint32_t OutputLength = u_strToUpper(reinterpret_cast<UChar *>(Destination),
DestinationCapacity,
reinterpret_cast<const UChar *>(Source),
SourceLength,
"", &ErrorCode);
#else
uint32_t OutputLength = u_strToUpper(Destination, DestinationCapacity,
Source, SourceLength,
"", &ErrorCode);
#endif
if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) {
swift::crash("u_strToUpper: Unexpected error uppercasing unicode string.");
}
Expand All @@ -274,17 +271,11 @@ swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination,
const uint16_t *Source,
int32_t SourceLength) {
UErrorCode ErrorCode = U_ZERO_ERROR;
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
uint32_t OutputLength = u_strToLower(reinterpret_cast<UChar *>(Destination),
DestinationCapacity,
reinterpret_cast<const UChar *>(Source),
SourceLength,
"", &ErrorCode);
#else
uint32_t OutputLength = u_strToLower(Destination, DestinationCapacity,
Source, SourceLength,
"", &ErrorCode);
#endif
if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) {
swift::crash("u_strToLower: Unexpected error lowercasing unicode string.");
}
Expand All @@ -303,28 +294,6 @@ template <typename T, typename U> const T *ptr_cast(const U *p) {
}
}

#if defined(__APPLE__)
#include <stdint.h>
extern "C" {
// Declare a few external functions to avoid a dependency on ICU headers.
typedef struct UBreakIterator UBreakIterator;
typedef enum UBreakIteratorType {} UBreakIteratorType;
typedef enum UErrorCode {} UErrorCode;
typedef uint16_t UChar;

void ubrk_close(UBreakIterator *);
UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *,
int32_t, UErrorCode *);
int32_t ubrk_preceding(UBreakIterator *, int32_t);
int32_t ubrk_following(UBreakIterator *, int32_t);
void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
}

// Force an autolink with ICU
asm(".linker_option \"-licucore\"\n");

#endif // defined(__APPLE__)

void swift::__swift_stdlib_ubrk_close(
swift::__swift_stdlib_UBreakIterator *bi) {
ubrk_close(ptr_cast<UBreakIterator>(bi));
Expand All @@ -333,16 +302,10 @@ void swift::__swift_stdlib_ubrk_close(
swift::__swift_stdlib_UBreakIterator *swift::__swift_stdlib_ubrk_open(
swift::__swift_stdlib_UBreakIteratorType type, const char *locale,
const uint16_t *text, int32_t textLength, __swift_stdlib_UErrorCode *status) {
#if defined(__CYGWIN__) || defined( _MSC_VER) || defined(__linux__)
return ptr_cast<swift::__swift_stdlib_UBreakIterator>(
ubrk_open(static_cast<UBreakIteratorType>(type), locale,
reinterpret_cast<const UChar*>(text), textLength,
reinterpret_cast<const UChar *>(text), textLength,
ptr_cast<UErrorCode>(status)));
#else
return ptr_cast<swift::__swift_stdlib_UBreakIterator>(
ubrk_open(static_cast<UBreakIteratorType>(type), locale, text, textLength,
ptr_cast<UErrorCode>(status)));
#endif
}

int32_t
Expand All @@ -356,9 +319,18 @@ swift::__swift_stdlib_ubrk_following(swift::__swift_stdlib_UBreakIterator *bi,
int32_t offset) {
return ubrk_following(ptr_cast<UBreakIterator>(bi), offset);
}

void swift::__swift_stdlib_ubrk_setText(
swift::__swift_stdlib_UBreakIterator *bi, const __swift_stdlib_UChar *text,
__swift_int32_t textLength, __swift_stdlib_UErrorCode *status) {
return ubrk_setText(ptr_cast<UBreakIterator>(bi), ptr_cast<UChar>(text),
textLength, ptr_cast<UErrorCode>(status));
}

// Force an autolink with ICU
#if defined(__MACH__)
asm(".linker_option \"-licucore\"\n");
#elif defined(_WIN32)
#pragma comment(lib, "icucore.lib")
#endif // defined(__MACH__)