Skip to content

Commit 6038f51

Browse files
authored
Add variants to analysis classes, Add IndexState.data_stream (#727)
1 parent 008a2cc commit 6038f51

File tree

13 files changed

+2103
-131
lines changed

13 files changed

+2103
-131
lines changed

output/schema/schema.json

Lines changed: 1724 additions & 83 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/typescript/types.ts

Lines changed: 184 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

specification/_types/analysis/StopWords.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@
2323
* Also accepts an array of stop words.
2424
* @class_serializer: StopWordsFormatter
2525
*/
26-
export type StopWords = string[]
26+
export type StopWords = string | string[]

specification/_types/analysis/analyzers.ts

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,66 +22,100 @@ import { integer } from '@_types/Numeric'
2222
import { Language, SnowballLanguage } from './languages'
2323
import { StopWords } from './StopWords'
2424
import { NoriDecompoundMode } from './tokenizers'
25+
import { IcuAnalyzer } from './icu-plugin'
26+
import { KuromojiAnalyzer } from './kuromoji-plugin'
2527

26-
export class AnalyzerBase {
27-
type: string
28-
version: VersionString
29-
}
30-
31-
export class CustomAnalyzer extends AnalyzerBase {
32-
char_filter: string[]
33-
filter: string[]
34-
position_increment_gap: integer
35-
position_offset_gap: integer
28+
export class CustomAnalyzer {
29+
type: 'custom'
30+
char_filter?: string[]
31+
filter?: string[]
32+
position_increment_gap?: integer
33+
position_offset_gap?: integer
3634
tokenizer: string
3735
}
3836

39-
export class FingerprintAnalyzer extends AnalyzerBase {
37+
export class FingerprintAnalyzer {
38+
type: 'fingerprint'
39+
version: VersionString
4040
max_output_size: integer
4141
preserve_original: boolean
4242
separator: string
4343
stopwords: StopWords
4444
stopwords_path: string
4545
}
4646

47-
export class KeywordAnalyzer extends AnalyzerBase {}
47+
export class KeywordAnalyzer {
48+
type: 'keyword'
49+
version: VersionString
50+
}
4851

49-
export class LanguageAnalyzer extends AnalyzerBase {
52+
export class LanguageAnalyzer {
53+
type: 'language'
54+
version: VersionString
5055
language: Language
5156
stem_exclusion: string[]
5257
stopwords: StopWords
5358
stopwords_path: string
54-
type: string
5559
}
5660

57-
export class NoriAnalyzer extends AnalyzerBase {
61+
export class NoriAnalyzer {
62+
type: 'nori'
63+
version: VersionString
5864
decompound_mode: NoriDecompoundMode
5965
stoptags: string[]
6066
user_dictionary: string
6167
}
6268

63-
export class PatternAnalyzer extends AnalyzerBase {
69+
export class PatternAnalyzer {
70+
type: 'pattern'
71+
version: VersionString
6472
flags: string
6573
lowercase: boolean
6674
pattern: string
6775
stopwords: StopWords
6876
}
6977

70-
export class SimpleAnalyzer extends AnalyzerBase {}
78+
export class SimpleAnalyzer {
79+
type: 'simple'
80+
version: VersionString
81+
}
7182

72-
export class SnowballAnalyzer extends AnalyzerBase {
83+
export class SnowballAnalyzer {
84+
type: 'snowball'
85+
version: VersionString
7386
language: SnowballLanguage
7487
stopwords: StopWords
7588
}
7689

77-
export class StandardAnalyzer extends AnalyzerBase {
90+
export class StandardAnalyzer {
91+
type: 'standard'
7892
max_token_length: integer
7993
stopwords: StopWords
8094
}
8195

82-
export class StopAnalyzer extends AnalyzerBase {
96+
export class StopAnalyzer {
97+
type: 'stop'
98+
version: VersionString
8399
stopwords: StopWords
84100
stopwords_path: string
85101
}
86102

87-
export class WhitespaceAnalyzer extends AnalyzerBase {}
103+
export class WhitespaceAnalyzer {
104+
type: 'whitespace'
105+
version: VersionString
106+
}
107+
108+
/** @variants internal tag='type' */
109+
export type Analyzer =
110+
| CustomAnalyzer
111+
| FingerprintAnalyzer
112+
| KeywordAnalyzer
113+
| LanguageAnalyzer
114+
| NoriAnalyzer
115+
| PatternAnalyzer
116+
| SimpleAnalyzer
117+
| StandardAnalyzer
118+
| StopAnalyzer
119+
| WhitespaceAnalyzer
120+
| IcuAnalyzer
121+
| KuromojiAnalyzer

specification/_types/analysis/char_filters.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,27 @@ import { VersionString } from '@_types/common'
2121
import { PatternReplaceTokenFilter } from './token_filters'
2222

2323
export class CharFilterBase {
24-
type: string
2524
version?: VersionString
2625
}
2726

27+
/** @variants internal tag='type' */
2828
export type CharFilter =
2929
| HtmlStripCharFilter
3030
| MappingCharFilter
3131
| PatternReplaceTokenFilter
3232

33-
export class HtmlStripCharFilter extends CharFilterBase {}
33+
export class HtmlStripCharFilter extends CharFilterBase {
34+
type: 'html_strip'
35+
}
3436

3537
export class MappingCharFilter extends CharFilterBase {
38+
type: 'mapping'
3639
mappings: string[]
3740
mappings_path: string
3841
}
3942

4043
export class PatternReplaceCharFilter extends CharFilterBase {
44+
type: 'pattern_replace'
4145
flags: string
4246
pattern: string
4347
replacement: string

specification/_types/analysis/icu-plugin.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
* under the License.
1818
*/
1919

20-
import { AnalyzerBase } from './analyzers'
2120
import { CharFilterBase } from './char_filters'
2221
import { TokenizerBase } from './tokenizers'
2322
import { TokenFilterBase } from './token_filters'
@@ -41,10 +40,12 @@ export class IcuNormalizationCharFilter extends CharFilterBase {
4140
}
4241

4342
export class IcuFoldingTokenFilter extends TokenFilterBase {
43+
type: 'icu_folding'
4444
unicode_set_filter: string
4545
}
4646

4747
export class IcuCollationTokenFilter extends TokenFilterBase {
48+
type: 'icu_collation'
4849
alternate: IcuCollationAlternate
4950
caseFirst: IcuCollationCaseFirst
5051
caseLevel: boolean
@@ -58,7 +59,8 @@ export class IcuCollationTokenFilter extends TokenFilterBase {
5859
variant: string
5960
}
6061

61-
export class IcuAnalyzer extends AnalyzerBase {
62+
export class IcuAnalyzer {
63+
type: 'icu_analyzer'
6264
method: IcuNormalizationType
6365
mode: IcuNormalizationMode
6466
}

specification/_types/analysis/kuromoji-plugin.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,30 +18,34 @@
1818
*/
1919

2020
import { integer } from '@_types/Numeric'
21-
import { AnalyzerBase } from './analyzers'
2221
import { CharFilterBase } from './char_filters'
2322
import { TokenizerBase } from './tokenizers'
2423
import { TokenFilterBase } from './token_filters'
2524

26-
export class KuromojiAnalyzer extends AnalyzerBase {
25+
export class KuromojiAnalyzer {
26+
type: 'kuromoji'
2727
mode: KuromojiTokenizationMode
2828
user_dictionary: string
2929
}
3030

3131
export class KuromojiIterationMarkCharFilter extends CharFilterBase {
32+
type: 'kuromoji_iteration_mark'
3233
normalize_kana: boolean
3334
normalize_kanji: boolean
3435
}
3536

3637
export class KuromojiPartOfSpeechTokenFilter extends TokenFilterBase {
38+
type: 'kuromoji_part_of_speech'
3739
stoptags: string[]
3840
}
3941

4042
export class KuromojiReadingFormTokenFilter extends TokenFilterBase {
43+
type: 'kuromoji_readingform'
4144
use_romaji: boolean
4245
}
4346

4447
export class KuromojiStemmerTokenFilter extends TokenFilterBase {
48+
type: 'kuromoji_stemmer'
4549
minimum_length: integer
4650
}
4751

@@ -52,6 +56,7 @@ export enum KuromojiTokenizationMode {
5256
}
5357

5458
export class KuromojiTokenizer extends TokenizerBase {
59+
type: 'kuromoji_tokenizer'
5560
discard_punctuation: boolean
5661
mode: KuromojiTokenizationMode
5762
nbest_cost: integer
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
export class CustomNormalizer {
21+
type: 'custom'
22+
char_filter?: string[]
23+
filter?: string[]
24+
}

specification/_types/analysis/phonetic-plugin.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export enum PhoneticEncoder {
3737

3838
export enum PhoneticLanguage {
3939
any = 0,
40-
comomon = 1,
40+
common = 1,
4141
cyrillic = 2,
4242
english = 3,
4343
french = 4,
@@ -62,6 +62,7 @@ export enum PhoneticRuleType {
6262
}
6363

6464
export class PhoneticTokenFilter extends TokenFilterBase {
65+
type: 'phonetic'
6566
encoder: PhoneticEncoder
6667
languageset: PhoneticLanguage[]
6768
max_code_len: integer

0 commit comments

Comments
 (0)