Skip to content

Commit cdfe7dc

Browse files
authored
Add email and idn-email format support (#103)
Related to #54
1 parent ddaf1bd commit cdfe7dc

File tree

13 files changed

+258
-48
lines changed

13 files changed

+258
-48
lines changed

README.md

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -332,23 +332,10 @@ val valid = schema.validate(elementToValidate, errors::add)
332332

333333
## Format assertion
334334

335-
The library supports `format` assertion. Not all formats are supported yet. The supported formats are:
336-
* date
337-
* time
338-
* date-time
339-
* duration
340-
* json-pointer
341-
* relative-json-pointer
342-
* ipv4
343-
* ipv6
344-
* uuid
345-
* hostname
346-
* idn-hostname
347-
* uri
348-
* uri-reference
349-
* uri-template
350-
* iri
351-
* iri-reference
335+
The library supports `format` assertion.
336+
Almost all formats from [JSON schema draft 2020-12](https://json-schema.org/draft/2020-12/draft-bhutton-json-schema-validation-01#section-7.3) are supported.
337+
Unsupported formats:
338+
* regex
352339

353340
But there is an API to implement the user's defined format validation.
354341
The [FormatValidator](src/commonMain/kotlin/io/github/optimumcode/json/schema/ValidationError.kt) interface can be user for that.

src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ import io.github.optimumcode.json.schema.internal.factories.AbstractAssertionFac
1616
import io.github.optimumcode.json.schema.internal.formats.DateFormatValidator
1717
import io.github.optimumcode.json.schema.internal.formats.DateTimeFormatValidator
1818
import io.github.optimumcode.json.schema.internal.formats.DurationFormatValidator
19+
import io.github.optimumcode.json.schema.internal.formats.EmailFormatValidator
1920
import io.github.optimumcode.json.schema.internal.formats.HostnameFormatValidator
21+
import io.github.optimumcode.json.schema.internal.formats.IdnEmailFormatValidator
2022
import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValidator
2123
import io.github.optimumcode.json.schema.internal.formats.IpV4FormatValidator
2224
import io.github.optimumcode.json.schema.internal.formats.IpV6FormatValidator
@@ -82,6 +84,8 @@ internal sealed class FormatAssertionFactory(
8284
"iri" to IriFormatValidator,
8385
"iri-reference" to IriReferenceFormatValidator,
8486
"uri-template" to UriTemplateFormatValidator,
87+
"email" to EmailFormatValidator,
88+
"idn-email" to IdnEmailFormatValidator,
8589
)
8690
}
8791
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package io.github.optimumcode.json.schema.internal.formats
2+
3+
import de.cketti.codepoints.CodePoints
4+
import de.cketti.codepoints.codePointAt
5+
import io.github.optimumcode.json.schema.FormatValidationResult
6+
import io.github.optimumcode.json.schema.FormatValidator
7+
import io.github.optimumcode.json.schema.internal.util.allCodepoints
8+
9+
private const val AT_CHAR = '@'
10+
private const val IP_PART_START = '['
11+
private const val IP_PART_END = ']'
12+
private const val QUOTE = '"'
13+
private const val BACK_SLASH = '\\'.code
14+
private const val IPV6_PREFIX = "IPv6:"
15+
internal const val MAX_ASCII_CODEPOINT = 0x7F
16+
17+
internal abstract class AbstractEmailFormatValidator(
18+
private val hostnameValidator: AbstractStringFormatValidator,
19+
) : AbstractStringFormatValidator() {
20+
override fun validate(value: String): FormatValidationResult {
21+
if (value.isEmpty()) {
22+
return FormatValidator.Invalid()
23+
}
24+
val delimiterIndex = value.lastIndexOf(AT_CHAR)
25+
if (delimiterIndex <= 0 || delimiterIndex == value.lastIndex) {
26+
// either local-part of domain is empty
27+
return FormatValidator.Invalid()
28+
}
29+
val localPart = value.substring(0, delimiterIndex)
30+
val domainPart = value.substring(delimiterIndex + 1)
31+
return if (isValidLocalPart(localPart) && isValidDomainPart(domainPart)) {
32+
FormatValidator.Valid()
33+
} else {
34+
FormatValidator.Invalid()
35+
}
36+
}
37+
38+
private fun isValidDomainPart(domainPart: String): Boolean {
39+
return if (domainPart.run { startsWith(IP_PART_START) && endsWith(IP_PART_END) }) {
40+
val ipPart = domainPart.substring(1, domainPart.lastIndex)
41+
isValidIpPart(ipPart)
42+
} else {
43+
hostnameValidator.validate(domainPart).isValid()
44+
}
45+
}
46+
47+
private fun isValidIpPart(ipPart: String): Boolean {
48+
return if (ipPart.startsWith(IPV6_PREFIX)) {
49+
IpV6FormatValidator.validate(ipPart.removePrefix(IPV6_PREFIX))
50+
} else {
51+
IpV4FormatValidator.validate(ipPart)
52+
}.isValid()
53+
}
54+
55+
private fun isValidLocalPart(localPart: String): Boolean {
56+
return if (localPart.run { startsWith(QUOTE) || endsWith(QUOTE) }) {
57+
isValidQuotedString(localPart)
58+
} else {
59+
isValidDotString(localPart)
60+
}
61+
}
62+
63+
private fun isValidDotString(localPart: String): Boolean {
64+
return Validation.eachSeparatedPart(localPart, separator = '.') {
65+
it.isNotEmpty() && it.allCodepoints(::isAText)
66+
}
67+
}
68+
69+
protected open fun isAText(codepoint: Int): Boolean {
70+
if (codepoint > MAX_ASCII_CODEPOINT) {
71+
return false
72+
}
73+
val asChar = codepoint.toChar()
74+
return Validation.isAlpha(asChar) || Validation.isDigit(asChar) || isSpecialCharacter(asChar)
75+
}
76+
77+
private fun isSpecialCharacter(codepoint: Char): Boolean =
78+
codepoint == '!' || codepoint == '#' || codepoint == '$' || codepoint == '%' ||
79+
codepoint == '&' || codepoint == '\'' || codepoint == '*' || codepoint == '+' ||
80+
codepoint == '-' || codepoint == '/' || codepoint == '=' || codepoint == '?' ||
81+
codepoint == '^' || codepoint == '_' || codepoint == '`' || codepoint == '{' ||
82+
codepoint == '}' || codepoint == '~' || codepoint == '|'
83+
84+
private fun isValidQuotedString(localPart: String): Boolean {
85+
if (localPart.length <= 2) {
86+
return false
87+
}
88+
if (localPart.run { !startsWith(QUOTE) || !endsWith(QUOTE) }) {
89+
return false
90+
}
91+
val quotedContent = localPart.substring(1, localPart.lastIndex)
92+
return isValidQuotedContent(quotedContent)
93+
}
94+
95+
private fun isValidQuotedContent(quotedContent: String): Boolean {
96+
// cannot be empty at this point
97+
var index = 0
98+
val length = quotedContent.length
99+
while (index < length) {
100+
val codePoint = quotedContent.codePointAt(index)
101+
index += CodePoints.charCount(codePoint)
102+
if (codePoint != BACK_SLASH) {
103+
if (isValidQText(codePoint)) {
104+
continue
105+
}
106+
return false
107+
}
108+
if (index >= length) {
109+
// last backslash is not allowed
110+
// E.g.: "\"
111+
return false
112+
}
113+
val nextChar = quotedContent.codePointAt(index)
114+
if (nextChar !in ' '.code..'~'.code) {
115+
// invalid quote pair
116+
return false
117+
}
118+
// always one because of condition above
119+
index += 1
120+
}
121+
return true
122+
}
123+
124+
protected open fun isValidQText(codepoint: Int): Boolean =
125+
// \ is checked explicitly
126+
codepoint == ' '.code || codepoint == '!'.code || codepoint in '#'.code..'~'.code
127+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
package io.github.optimumcode.json.schema.internal.formats
2+
3+
internal object EmailFormatValidator : AbstractEmailFormatValidator(HostnameFormatValidator)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package io.github.optimumcode.json.schema.internal.formats
2+
3+
internal object IdnEmailFormatValidator : AbstractEmailFormatValidator(IdnHostnameFormatValidator) {
4+
override fun isAText(codepoint: Int): Boolean = super.isAText(codepoint) || isUtf8NonAscii(codepoint)
5+
6+
override fun isValidQText(codepoint: Int): Boolean = super.isValidQText(codepoint) || isUtf8NonAscii(codepoint)
7+
8+
/**
9+
* The spec is quite clear about which codepoints are allowed.
10+
* So, this method allows all codepoints that are greater than 0x7F
11+
*/
12+
private fun isUtf8NonAscii(codepoint: Int): Boolean = codepoint > MAX_ASCII_CODEPOINT
13+
}

src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package io.github.optimumcode.json.schema.internal.formats
22

3+
import io.github.optimumcode.json.schema.internal.formats.Validation.isAlpha
4+
import io.github.optimumcode.json.schema.internal.formats.Validation.isDigit
5+
36
internal object UriSpec {
47
const val SCHEMA_DELIMITER = ':'
58
const val QUERY_DELIMITER = '?'
@@ -268,10 +271,6 @@ internal object UriSpec {
268271
return str[index] == '%' && isHexDigit(str[index + 1]) && isHexDigit(str[index + 2])
269272
}
270273

271-
fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z'
272-
273-
fun isDigit(c: Char): Boolean = c in '0'..'9'
274-
275274
private fun isPChar(c: Char): Boolean = isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@'
276275

277276
private fun isUnreserved(c: Char): Boolean = isAlpha(c) || isDigit(c) || c == '_' || c == '-' || c == '.' || c == '~'

src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriTemplateFormatValidator.kt

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import de.cketti.codepoints.CodePoints
44
import de.cketti.codepoints.codePointAt
55
import io.github.optimumcode.json.schema.FormatValidationResult
66
import io.github.optimumcode.json.schema.FormatValidator
7+
import io.github.optimumcode.json.schema.internal.formats.Validation.eachSeparatedPart
78

89
internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
910
private const val EXPRESSION_START = '{'.code
@@ -121,28 +122,6 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
121122
return eachSeparatedPart(varList, separator = ',', ::isValidVarSpec)
122123
}
123124

124-
private inline fun eachSeparatedPart(
125-
value: String,
126-
separator: Char,
127-
isValid: (String) -> Boolean,
128-
): Boolean {
129-
var lastSeparator = -1
130-
do {
131-
val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1)
132-
val part =
133-
if (separatorIndex < 0) {
134-
value.substring(lastSeparator + 1)
135-
} else {
136-
value.substring(lastSeparator + 1, separatorIndex)
137-
}
138-
if (!isValid(part)) {
139-
return false
140-
}
141-
lastSeparator = separatorIndex
142-
} while (separatorIndex > 0)
143-
return true
144-
}
145-
146125
private fun isValidVarSpec(varSpec: String): Boolean {
147126
if (varSpec.isEmpty()) {
148127
return false
@@ -172,7 +151,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
172151
return eachSeparatedPart(varName, separator = '.') { part ->
173152
part.isNotEmpty() &&
174153
UriSpec.hasValidCharsOrPctEncoded(part) {
175-
UriSpec.isAlpha(it) || UriSpec.isDigit(it) || it == '_'
154+
Validation.isAlpha(it) || Validation.isDigit(it) || it == '_'
176155
}
177156
}
178157
}
@@ -186,7 +165,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
186165
// to long value
187166
return false
188167
}
189-
return maxLength.all(UriSpec::isDigit)
168+
return maxLength.all(Validation::isDigit)
190169
}
191170

192171
private fun isOperator(char: Char): Boolean =
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package io.github.optimumcode.json.schema.internal.formats
2+
3+
internal object Validation {
4+
fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z'
5+
6+
fun isDigit(c: Char): Boolean = c in '0'..'9'
7+
8+
inline fun eachSeparatedPart(
9+
value: String,
10+
separator: Char,
11+
isValid: (String) -> Boolean,
12+
): Boolean {
13+
var lastSeparator = -1
14+
do {
15+
val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1)
16+
val part =
17+
if (separatorIndex < 0) {
18+
value.substring(lastSeparator + 1)
19+
} else {
20+
value.substring(lastSeparator + 1, separatorIndex)
21+
}
22+
if (!isValid(part)) {
23+
return false
24+
}
25+
lastSeparator = separatorIndex
26+
} while (separatorIndex > 0)
27+
return true
28+
}
29+
}

src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/util/UnicodeUtil.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,13 @@ internal inline fun CharSequence.forEachCodePointIndexed(
4343
}
4444
block(startIndex, firstChar.code)
4545
}
46+
}
47+
48+
internal fun CharSequence.allCodepoints(condition: (Int) -> Boolean): Boolean {
49+
forEachCodePointIndexed { _, codePoint ->
50+
if (!condition(codePoint)) {
51+
return false
52+
}
53+
}
54+
return true
4655
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package io.github.optimumcode.json.schema.assertions.general.format
2+
3+
import io.kotest.core.spec.style.FunSpec
4+
5+
class JsonSchemaEmailFormatValidationTest : FunSpec() {
6+
init {
7+
formatValidationTestSuite(
8+
format = "email",
9+
validTestCases =
10+
listOf(
11+
"a2!#$%&'*+-/=?^_`{}~|@domain.com",
12+
"\"\\\"\\ \\@\\!\\#\\[\\]\\~\"@example.com",
13+
"\" !#[]~a2\"@example.com",
14+
"test@[127.0.0.1]",
15+
"test@[IPv6:FF01::101]",
16+
),
17+
invalidTestCases =
18+
listOf(
19+
TestCase("", "empty email"),
20+
TestCase("@example.com", "empty local part"),
21+
TestCase("test@", "empty domain part"),
22+
TestCase("\"\"@example.com", "empty quoted string"),
23+
TestCase("\"[email protected]", "only start quote"),
24+
TestCase("test\"@example.com", "only end quote"),
25+
TestCase("\"test\\\"@example.com", "quoted last quote"),
26+
TestCase("\"te\\\nst\"@example.com", "invalid quoted character < space"),
27+
TestCase("\"te\\\u007fst\"@example.com", "invalid quoted character > ~"),
28+
TestCase("\"te\"st\"@example.com", "invalid character in quoted string"),
29+
TestCase("test@[127.0.0.300]", "invalid IPv4 in domain part"),
30+
TestCase("test@[IPv6:1:2:3:4:5:6:7:8:9]", "invalid IPv6 in domain part"),
31+
TestCase("test@[FF01::101]", "valid IPv6 in domain part without prefix"),
32+
TestCase("test@hostname.", "valid hostname in domain part"),
33+
TestCase("te\nst@hostname", "invalid character < space"),
34+
TestCase("te\u007fst@hostname", "invalid character > ~"),
35+
TestCase("\"te\nst\"@hostname", "invalid character in quoted local part < space"),
36+
TestCase("\"te\u007fst\"@hostname", "invalid character in quoted local part > ~"),
37+
),
38+
)
39+
}
40+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package io.github.optimumcode.json.schema.assertions.general.format
2+
3+
import io.kotest.core.spec.style.FunSpec
4+
5+
class JsonSchemaIdnEmailFormatValidationTest : FunSpec() {
6+
init {
7+
formatValidationTestSuite(
8+
format = "idn-email",
9+
validTestCases =
10+
listOf(
11+
"실례@실례.테스트",
12+
"\"실a\\~례\"@실례.테스트",
13+
),
14+
invalidTestCases =
15+
listOf(
16+
TestCase("\u007F례@실례.테스트", "invalid codepoint in local part"),
17+
TestCase("\"\u007F\"@실례.테스트", "invalid codepoint in quoted local part"),
18+
),
19+
)
20+
}
21+
}

src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaUriTemplateFormatValidationTest.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,15 @@ class JsonSchemaUriTemplateFormatValidationTest : FunSpec() {
1515
"https://simple.uri",
1616
"https://test%20uri.com",
1717
"https://testname/{first%20name}",
18-
"https://testname/{first.name}",
18+
"https://testname/{name_1.name_2}",
1919
"https://\u00a0\ud7ff\uf900\ufdcf\ufdf0\uffef\uf8ff",
2020
),
2121
invalidTestCases =
2222
listOf(
2323
TestCase("https://example.com/{}", "empty expression"),
2424
TestCase("https://example.com/{,}", "empty expression with var delimiter"),
2525
TestCase("https://example.com/{test.}", "empty expression with name delimiter"),
26+
TestCase("https://example.com/{te~st}", "invalid character in var name"),
2627
TestCase("https://example.com/}", "end expression without start"),
2728
TestCase("https://example.com/{t{e}st}", "expression inside expression"),
2829
TestCase("https://example.com/{test:0}", "leading zero"),

0 commit comments

Comments
 (0)