Skip to content

Commit feafcba

Browse files
committed
Migrate to karakteristics library
1 parent ecd5681 commit feafcba

File tree

4 files changed

+40
-48
lines changed

4 files changed

+40
-48
lines changed

build.gradle.kts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ plugins {
88
allprojects {
99
repositories {
1010
mavenCentral()
11+
mavenLocal()
1112
}
1213
}
1314

1415
apiValidation {
15-
ignoredProjects += listOf("benchmark", "test-suites", "generator")
16+
ignoredProjects += listOf("benchmark", "test-suites")
1617
}
1718

1819
val ossrhUsername: String by project.ext

gradle/libs.versions.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ graphql-ktor = { group = "com.expediagroup", name = "graphql-kotlin-ktor-client"
5050
clikt = { group = "com.github.ajalt.clikt", name = "clikt", version = "4.4.0" }
5151
kotlin-codepoints = { group = "de.cketti.unicode", name = "kotlin-codepoints", version = "0.9.0" }
5252
normalize = { group = "com.doist.x", name = "normalize", version = "1.1.1" }
53+
karacteristics = { group = "io.github.optimumcode", name = "karacteristics", version = "0.0.2-SNAPSHOT" }
5354

5455
[bundles]
5556
openapi = ["openapi-validator", "openapi-interfaces", "openapi-jackson"]

json-schema-validator/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ kotlin {
192192
) {
193193
because("simplifies work with unicode codepoints")
194194
}
195+
implementation(libs.karacteristics)
195196
}
196197
}
197198

json-schema-validator/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnHostnameFormatValidator.kt

Lines changed: 36 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -9,26 +9,27 @@ import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValid
99
import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValidator.BidiLabelType.RTL
1010
import io.github.optimumcode.json.schema.internal.hostname.Punycode
1111
import io.github.optimumcode.json.schema.internal.hostname.isNormalized
12-
import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory
13-
import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory.ENCLOSING_MARK
14-
import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory.NONSPACING_MARK
15-
import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory.SPACING_MARK
16-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality
17-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.ARABIC_LETTER
18-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.ARABIC_NUMBER
19-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.BOUNDARY_NEUTRAL
20-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.COMMON_SEPARATOR
21-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.EUROPEAN_NUMBER
22-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.EUROPEAN_SEPARATOR
23-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.EUROPEAN_TERMINATOR
24-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.LEFT_TO_RIGHT
25-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.OTHER_NEUTRAL
26-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.RIGHT_TO_LEFT
27-
import io.github.optimumcode.json.schema.internal.unicode.DerivedProperties
28-
import io.github.optimumcode.json.schema.internal.unicode.JoiningType
2912
import io.github.optimumcode.json.schema.internal.util.forEachCodePointIndexed
13+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.ARABIC_LETTER
14+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.ARABIC_NUMBER
15+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.BOUNDARY_NEUTRAL
16+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.COMMON_SEPARATOR
17+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.EUROPEAN_NUMBER
18+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.EUROPEAN_SEPARATOR
19+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.EUROPEAN_TERMINATOR
20+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.LEFT_TO_RIGHT
21+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.OTHER_NEUTRAL
22+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.RIGHT_TO_LEFT
23+
import io.github.optimumcode.karacteristics.CodepointCategory.ENCLOSING_MARK
24+
import io.github.optimumcode.karacteristics.CodepointCategory.NONSPACING_MARK
25+
import io.github.optimumcode.karacteristics.CodepointCategory.SPACING_MARK
26+
import io.github.optimumcode.karacteristics.CodepointDerivedProperty
27+
import io.github.optimumcode.karacteristics.CodepointJoiningType
28+
import io.github.optimumcode.karacteristics.bidirectionalClass
29+
import io.github.optimumcode.karacteristics.category
30+
import io.github.optimumcode.karacteristics.contains
3031
import kotlin.math.abs
31-
import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.NONSPACING_MARK as NONSPACING_MARK_DIRECTIONALITY
32+
import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.NONSPACING_MARK as NONSPACING_MARK_DIRECTIONALITY
3233

3334
private const val GREEK_LOWER_NUMERAL_SIGN: Int = 0x0375
3435
private const val HEBREW_GERESH: Int = 0x05F3
@@ -57,7 +58,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
5758
value.forEachLabel {
5859
it.forEachCodePointIndexed { _, codePoint ->
5960
isBidiDomainName = isBidiDomainName ||
60-
when (getDirectionality(codePoint)) {
61+
when (codePoint.bidirectionalClass) {
6162
RIGHT_TO_LEFT,
6263
ARABIC_LETTER,
6364
ARABIC_NUMBER,
@@ -131,7 +132,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
131132

132133
val bidiLabelType: BidiLabelType =
133134
if (isBidiDomainName) {
134-
when (getDirectionality(firstCodePoint)) {
135+
when (firstCodePoint.bidirectionalClass) {
135136
LEFT_TO_RIGHT,
136137
-> LTR
137138

@@ -171,7 +172,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
171172
//
172173
// Check absents of opposite directionality
173174
// Point 4 https://datatracker.ietf.org/doc/html/rfc5893#section-2
174-
isExtendedArabicIndicDigit(codePoint) || EUROPEAN_NUMBER.characterData.contains(codePoint) -> -1
175+
isExtendedArabicIndicDigit(codePoint) || codePoint in EUROPEAN_NUMBER -> -1
175176
else -> 0
176177
}
177178
if (abs(currentArabicDigitStatus - arabicDigitStatus) > 1) {
@@ -239,10 +240,10 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
239240
-> false
240241

241242
else ->
242-
DerivedProperties.DISALLOWED.contains(codePoint) ||
243-
DerivedProperties.UNASSIGNED.contains(codePoint) ||
244-
DerivedProperties.CONTEXTJ.contains(codePoint) ||
245-
DerivedProperties.CONTEXTO.contains(codePoint)
243+
codePoint in CodepointDerivedProperty.DISALLOWED ||
244+
codePoint in CodepointDerivedProperty.UNASSIGNED ||
245+
codePoint in CodepointDerivedProperty.CONTEXTJ ||
246+
codePoint in CodepointDerivedProperty.CONTEXTO
246247
}
247248
}
248249

@@ -253,7 +254,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
253254
if (bidiLabelType == NONE) {
254255
return false
255256
}
256-
return when (val directionality = getDirectionality(codePoint)) {
257+
return when (val directionality = codePoint.bidirectionalClass) {
257258
EUROPEAN_NUMBER,
258259
EUROPEAN_SEPARATOR,
259260
COMMON_SEPARATOR,
@@ -297,10 +298,10 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
297298
}
298299
var index = unicode.length
299300
// Zero or more characters with Bidi property NSM are allowed in the end
300-
while (index > 0 && getDirectionality(unicode.codePointBefore(index)) == NONSPACING_MARK_DIRECTIONALITY) {
301+
while (index > 0 && unicode.codePointBefore(index).bidirectionalClass == NONSPACING_MARK_DIRECTIONALITY) {
301302
index--
302303
}
303-
val lastCodepointDirectionality = getDirectionality(unicode.codePointBefore(index))
304+
val lastCodepointDirectionality = unicode.codePointBefore(index).bidirectionalClass
304305
return when (bidiLabelType) {
305306
NONE -> false
306307
RTL ->
@@ -456,7 +457,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
456457
return false
457458
}
458459
var j = index
459-
while (0 < j && JoiningType.TRANSPARENT.contains(unicode.codePointBefore(j))) {
460+
while (0 < j && unicode.codePointBefore(j) in CodepointJoiningType.TRANSPARENT) {
460461
j -= 1
461462
}
462463
if (j == 0) {
@@ -465,8 +466,8 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
465466
}
466467
val beforeFirstTransparent = unicode.codePointBefore(j)
467468
if (
468-
!JoiningType.LEFT_JOINING.contains(beforeFirstTransparent) &&
469-
!JoiningType.DUAL_JOINING.contains(beforeFirstTransparent)
469+
beforeFirstTransparent !in CodepointJoiningType.LEFT_JOINING &&
470+
beforeFirstTransparent !in CodepointJoiningType.DUAL_JOINING
470471
) {
471472
return true
472473
}
@@ -476,20 +477,20 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
476477
// Must have joining type T after
477478
return true
478479
}
479-
while (j < len && JoiningType.TRANSPARENT.contains(unicode.codePointAt(j))) {
480+
while (j < len && unicode.codePointAt(j) in CodepointJoiningType.TRANSPARENT) {
480481
j += 1
481482
}
482483
if (j == len) {
483484
// Must have joining type R or D after last T type
484485
return true
485486
}
486487
val afterLastTransparent = unicode.codePointAt(j)
487-
return !JoiningType.RIGHT_JOINING.contains(afterLastTransparent) &&
488-
!JoiningType.DUAL_JOINING.contains(afterLastTransparent)
488+
return afterLastTransparent !in CodepointJoiningType.RIGHT_JOINING &&
489+
afterLastTransparent !in CodepointJoiningType.DUAL_JOINING
489490
}
490491

491492
private fun isLeadingCombiningMark(codePoint: Int): Boolean =
492-
when (getCategory(codePoint)) {
493+
when (codePoint.category) {
493494
NONSPACING_MARK,
494495
SPACING_MARK,
495496
ENCLOSING_MARK,
@@ -519,18 +520,6 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() {
519520
return value.length
520521
}
521522

522-
private fun getCategory(codePoint: Int): CharacterCategory {
523-
return CharacterCategory.entries.first {
524-
it.characterData.contains(codePoint)
525-
}
526-
}
527-
528-
private fun getDirectionality(codePoint: Int): CharacterDirectionality {
529-
return CharacterDirectionality.entries.first {
530-
it.characterData.contains(codePoint)
531-
}
532-
}
533-
534523
@Suppress("detekt:MagicNumber")
535524
private fun isArabicIndicDigit(code: Int): Boolean = code in 0x0660..0x0669
536525

0 commit comments

Comments
 (0)