Skip to content

Commit bcea602

Browse files
authored
Adds HtmlStripProcessor and UriPartsProcessor (#2835)
1 parent 4209681 commit bcea602

File tree

1 file changed

+58
-0
lines changed

1 file changed

+58
-0
lines changed

specification/ingest/_types/Processors.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@ export class ProcessorContainer {
130130
* @doc_id gsub-processor
131131
*/
132132
gsub?: GsubProcessor
133+
/**
134+
* Removes HTML tags from the field.
135+
* If the field is an array of strings, HTML tags will be removed from all members of the array.
136+
* @doc_id htmlstrip-processor
137+
*/
138+
html_strip?: HtmlStripProcessor
133139
/**
134140
* Uses a pre-trained data frame analytics model or a model deployed for natural language processing tasks to infer against the data that is being ingested in the pipeline.
135141
* @doc_id inference-processor
@@ -230,6 +236,12 @@ export class ProcessorContainer {
230236
* @doc_id urldecode-processor
231237
*/
232238
urldecode?: UrlDecodeProcessor
239+
/**
240+
* Parses a Uniform Resource Identifier (URI) string and extracts its components as an object.
241+
* This URI object includes properties for the URI’s domain, path, fragment, port, query, scheme, user info, username, and password.
242+
* @doc_id uri-parts-processor
243+
*/
244+
uri_parts?: UriPartsProcessor
233245
/**
234246
* The `user_agent` processor extracts details from the user agent string a browser sends with its web requests.
235247
* This processor adds this information by default under the `user_agent` field.
@@ -722,6 +734,24 @@ export class GsubProcessor extends ProcessorBase {
722734
target_field?: Field
723735
}
724736

737+
export class HtmlStripProcessor extends ProcessorBase {
738+
/**
739+
* The string-valued field to remove HTML tags from.
740+
*/
741+
field: Field
742+
/**
743+
* If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document,
744+
* @server_default false
745+
*/
746+
ignore_missing?: boolean
747+
/**
748+
* The field to assign the converted value to
749+
* By default, the `field` is updated in-place.
750+
* @server_default field
751+
*/
752+
target_field?: Field
753+
}
754+
725755
export class InferenceProcessor extends ProcessorBase {
726756
/**
727757
* The ID or alias for the trained model, or the ID of the deployment.
@@ -1174,3 +1204,31 @@ export class UrlDecodeProcessor extends ProcessorBase {
11741204
*/
11751205
target_field?: Field
11761206
}
1207+
1208+
export class UriPartsProcessor extends ProcessorBase {
1209+
/**
1210+
* Field containing the URI string.
1211+
*/
1212+
field: Field
1213+
/**
1214+
* If `true` and `field` does not exist, the processor quietly exits without modifying the document.
1215+
* @server_default false
1216+
*/
1217+
ignore_missing?: boolean
1218+
/**
1219+
* If `true`, the processor copies the unparsed URI to `<target_field>.original`.
1220+
* @server_default true
1221+
*/
1222+
keep_original?: boolean
1223+
/**
1224+
* If `true`, the processor removes the `field` after parsing the URI string.
1225+
* If parsing fails, the processor does not remove the `field`.
1226+
* @server_default false
1227+
*/
1228+
remove_if_successful?: boolean
1229+
/**
1230+
* Output field for the URI object.
1231+
* @server_default url
1232+
*/
1233+
target_field?: Field
1234+
}

0 commit comments

Comments
 (0)