@@ -74,11 +74,11 @@ export interface BulkStats {
74
74
aborted : boolean
75
75
}
76
76
77
- interface IndexAction {
77
+ interface IndexActionOperation {
78
78
index : T . BulkIndexOperation
79
79
}
80
80
81
- interface CreateAction {
81
+ interface CreateActionOperation {
82
82
create : T . BulkCreateOperation
83
83
}
84
84
@@ -90,7 +90,9 @@ interface DeleteAction {
90
90
delete : T . BulkDeleteOperation
91
91
}
92
92
93
- type UpdateAction = [ UpdateActionOperation , Record < string , any > ]
93
+ type CreateAction = CreateActionOperation | [ CreateActionOperation , unknown ]
94
+ type IndexAction = IndexActionOperation | [ IndexActionOperation , unknown ]
95
+ type UpdateAction = [ UpdateActionOperation , T . BulkUpdateAction ]
94
96
type Action = IndexAction | CreateAction | UpdateAction | DeleteAction
95
97
96
98
export interface OnDropDocument < TDocument = unknown > {
@@ -101,6 +103,24 @@ export interface OnDropDocument<TDocument = unknown> {
101
103
retried : boolean
102
104
}
103
105
106
+ type BulkResponseItem = Partial < Record < T . BulkOperationType , T . BulkResponseItem > >
107
+
108
+ export interface OnSuccessDocument < TDocument = unknown > {
109
+ result : BulkResponseItem
110
+ document ?: TDocument
111
+ }
112
+
113
+ interface ZippedResult < TDocument = unknown > {
114
+ result : BulkResponseItem
115
+ raw : {
116
+ action : string
117
+ document ?: string
118
+ }
119
+ // this is a function so that deserialization is only done when needed
120
+ // to avoid a performance hit
121
+ document ?: ( ) => TDocument
122
+ }
123
+
104
124
export interface BulkHelperOptions < TDocument = unknown > extends T . BulkRequest {
105
125
datasource : TDocument [ ] | Buffer | Readable | AsyncIterator < TDocument >
106
126
onDocument : ( doc : TDocument ) => Action
@@ -110,6 +130,7 @@ export interface BulkHelperOptions<TDocument = unknown> extends T.BulkRequest {
110
130
retries ?: number
111
131
wait ?: number
112
132
onDrop ?: ( doc : OnDropDocument < TDocument > ) => void
133
+ onSuccess ?: ( doc : OnSuccessDocument ) => void
113
134
}
114
135
115
136
export interface BulkHelper < T > extends Promise < BulkStats > {
@@ -379,7 +400,7 @@ export default class Helpers {
379
400
clearTimeout ( timeoutRef )
380
401
}
381
402
382
- // In some cases the previos http call does not have finished,
403
+ // In some cases the previous http call does not have finished,
383
404
// or we didn't reach the flush bytes threshold, so we force one last operation.
384
405
if ( loadedOperations > 0 ) {
385
406
const send = await semaphore ( )
@@ -415,8 +436,8 @@ export default class Helpers {
415
436
// to guarantee that no more than the number of operations
416
437
// allowed to run at the same time are executed.
417
438
// It returns a semaphore function which resolves in the next tick
418
- // if we didn't reach the maximim concurrency yet, otherwise it returns
419
- // a promise that resolves as soon as one of the running request has finshed .
439
+ // if we didn't reach the maximum concurrency yet, otherwise it returns
440
+ // a promise that resolves as soon as one of the running requests has finished .
420
441
// The semaphore function resolves a send function, which will be used
421
442
// to send the actual msearch request.
422
443
// It also returns a finish function, which returns a promise that is resolved
@@ -548,6 +569,9 @@ export default class Helpers {
548
569
retries = this [ kMaxRetries ] ,
549
570
wait = 5000 ,
550
571
onDrop = noop ,
572
+ // onSuccess does not default to noop, to avoid the performance hit
573
+ // of deserializing every document in the bulk request
574
+ onSuccess,
551
575
...bulkOptions
552
576
} = options
553
577
@@ -620,26 +644,25 @@ export default class Helpers {
620
644
let chunkBytes = 0
621
645
timeoutRef = setTimeout ( onFlushTimeout , flushInterval ) // eslint-disable-line
622
646
623
- // @ts -expect-error datasoruce is an iterable
647
+ // @ts -expect-error datasource is an iterable
624
648
for await ( const chunk of datasource ) {
625
649
if ( shouldAbort ) break
626
650
timeoutRef . refresh ( )
627
- const action = onDocument ( chunk )
628
- const operation = Array . isArray ( action )
629
- ? Object . keys ( action [ 0 ] ) [ 0 ]
630
- : Object . keys ( action ) [ 0 ]
651
+ const result = onDocument ( chunk )
652
+ const [ action , payload ] = Array . isArray ( result ) ? result : [ result , chunk ]
653
+ const operation = Object . keys ( action ) [ 0 ]
631
654
if ( operation === 'index' || operation === 'create' ) {
632
655
actionBody = serializer . serialize ( action )
633
- payloadBody = typeof chunk === 'string' ? chunk : serializer . serialize ( chunk )
656
+ payloadBody = typeof payload === 'string'
657
+ ? payload
658
+ : serializer . serialize ( payload )
634
659
chunkBytes += Buffer . byteLength ( actionBody ) + Buffer . byteLength ( payloadBody )
635
660
bulkBody . push ( actionBody , payloadBody )
636
661
} else if ( operation === 'update' ) {
637
- // @ts -expect-error in case of update action is an array
638
- actionBody = serializer . serialize ( action [ 0 ] )
662
+ actionBody = serializer . serialize ( action )
639
663
payloadBody = typeof chunk === 'string'
640
664
? `{"doc":${ chunk } }`
641
- // @ts -expect-error in case of update action is an array
642
- : serializer . serialize ( { doc : chunk , ...action [ 1 ] } )
665
+ : serializer . serialize ( { doc : chunk , ...payload } )
643
666
chunkBytes += Buffer . byteLength ( actionBody ) + Buffer . byteLength ( payloadBody )
644
667
bulkBody . push ( actionBody , payloadBody )
645
668
} else if ( operation === 'delete' ) {
@@ -653,15 +676,16 @@ export default class Helpers {
653
676
654
677
if ( chunkBytes >= flushBytes ) {
655
678
stats . bytes += chunkBytes
656
- const send = await semaphore ( )
657
- send ( bulkBody . slice ( ) )
679
+ const bulkBodyCopy = bulkBody . slice ( )
658
680
bulkBody . length = 0
659
681
chunkBytes = 0
682
+ const send = await semaphore ( )
683
+ send ( bulkBodyCopy )
660
684
}
661
685
}
662
686
663
687
clearTimeout ( timeoutRef )
664
- // In some cases the previos http call does not have finished,
688
+ // In some cases the previous http call has not finished,
665
689
// or we didn't reach the flush bytes threshold, so we force one last operation.
666
690
if ( ! shouldAbort && chunkBytes > 0 ) {
667
691
const send = await semaphore ( )
@@ -697,8 +721,8 @@ export default class Helpers {
697
721
// to guarantee that no more than the number of operations
698
722
// allowed to run at the same time are executed.
699
723
// It returns a semaphore function which resolves in the next tick
700
- // if we didn't reach the maximim concurrency yet, otherwise it returns
701
- // a promise that resolves as soon as one of the running request has finshed .
724
+ // if we didn't reach the maximum concurrency yet, otherwise it returns
725
+ // a promise that resolves as soon as one of the running requests has finished .
702
726
// The semaphore function resolves a send function, which will be used
703
727
// to send the actual bulk request.
704
728
// It also returns a finish function, which returns a promise that is resolved
@@ -805,57 +829,93 @@ export default class Helpers {
805
829
callback ( )
806
830
}
807
831
832
+ /**
833
+ * Zips bulk response items (the action's result) with the original document body.
834
+ * The raw string version of action and document lines are also included.
835
+ */
836
+ function zipBulkResults ( responseItems : BulkResponseItem [ ] , bulkBody : string [ ] ) : ZippedResult [ ] {
837
+ const zipped = [ ]
838
+ let indexSlice = 0
839
+ for ( let i = 0 , len = responseItems . length ; i < len ; i ++ ) {
840
+ const result = responseItems [ i ]
841
+ const operation = Object . keys ( result ) [ 0 ]
842
+ let zipResult
843
+
844
+ if ( operation === 'delete' ) {
845
+ zipResult = {
846
+ result,
847
+ raw : { action : bulkBody [ indexSlice ] }
848
+ }
849
+ indexSlice += 1
850
+ } else {
851
+ const document = bulkBody [ indexSlice + 1 ]
852
+ zipResult = {
853
+ result,
854
+ raw : { action : bulkBody [ indexSlice ] , document } ,
855
+ // this is a function so that deserialization is only done when needed
856
+ // to avoid a performance hit
857
+ document : ( ) => serializer . deserialize ( document )
858
+ }
859
+ indexSlice += 2
860
+ }
861
+
862
+ zipped . push ( zipResult as ZippedResult )
863
+ }
864
+
865
+ return zipped
866
+ }
867
+
808
868
function tryBulk ( bulkBody : string [ ] , callback : ( err : Error | null , bulkBody : string [ ] ) => void ) : void {
809
869
if ( shouldAbort ) return callback ( null , [ ] )
810
870
client . bulk ( Object . assign ( { } , bulkOptions , { body : bulkBody } ) , reqOptions as TransportRequestOptionsWithMeta )
811
871
. then ( response => {
812
872
const result = response . body
873
+ const results = zipBulkResults ( result . items , bulkBody )
874
+
813
875
if ( ! result . errors ) {
814
876
stats . successful += result . items . length
815
- for ( const item of result . items ) {
816
- if ( item . update ?. result === 'noop' ) {
877
+ for ( const item of results ) {
878
+ const { result, document = noop } = item
879
+ if ( result . update ?. result === 'noop' ) {
817
880
stats . noop ++
818
881
}
882
+ if ( onSuccess != null ) onSuccess ( { result, document : document ( ) } )
819
883
}
820
884
return callback ( null , [ ] )
821
885
}
822
886
const retry = [ ]
823
- const { items } = result
824
- let indexSlice = 0
825
- for ( let i = 0 , len = items . length ; i < len ; i ++ ) {
826
- const action = items [ i ]
827
- const operation = Object . keys ( action ) [ 0 ]
887
+ for ( const item of results ) {
888
+ const { result, raw, document = noop } = item
889
+ const operation = Object . keys ( result ) [ 0 ]
828
890
// @ts -expect-error
829
- const responseItem = action [ operation as keyof T . BulkResponseItemContainer ]
891
+ const responseItem = result [ operation as keyof T . BulkResponseItemContainer ]
830
892
assert ( responseItem !== undefined , 'The responseItem is undefined, please file a bug report' )
831
893
832
894
if ( responseItem . status >= 400 ) {
833
895
// 429 is the only status code where we might want to retry
834
896
// a document, because it was not an error in the document itself,
835
- // but the ES node were handling too many operations.
897
+ // but the ES node was handling too many operations.
836
898
if ( responseItem . status === 429 ) {
837
- retry . push ( bulkBody [ indexSlice ] )
899
+ retry . push ( raw . action )
838
900
/* istanbul ignore next */
839
901
if ( operation !== 'delete' ) {
840
- retry . push ( bulkBody [ indexSlice + 1 ] )
902
+ retry . push ( raw . document ?? '' )
841
903
}
842
904
} else {
843
905
onDrop ( {
844
906
status : responseItem . status ,
845
907
error : responseItem . error ?? null ,
846
- operation : serializer . deserialize ( bulkBody [ indexSlice ] ) ,
908
+ operation : serializer . deserialize ( raw . action ) ,
847
909
// @ts -expect-error
848
- document : operation !== 'delete'
849
- ? serializer . deserialize ( bulkBody [ indexSlice + 1 ] )
850
- : null ,
910
+ document : document ( ) ,
851
911
retried : isRetrying
852
912
} )
853
913
stats . failed += 1
854
914
}
855
915
} else {
856
916
stats . successful += 1
917
+ if ( onSuccess != null ) onSuccess ( { result, document : document ( ) } )
857
918
}
858
- operation === 'delete' ? indexSlice += 1 : indexSlice += 2
859
919
}
860
920
callback ( null , retry )
861
921
} )
0 commit comments