@@ -36,6 +36,7 @@ import org.apache.spark.sql.SQLContext
36
36
import org .apache .spark .sql .SaveMode
37
37
import org .apache .spark .sql .types .ArrayType
38
38
import org .apache .spark .sql .types .IntegerType
39
+ import org .apache .spark .sql .types .MapType
39
40
import org .apache .spark .sql .types .StringType
40
41
import org .apache .spark .sql .types .StructField
41
42
import org .apache .spark .sql .types .StructType
@@ -2306,6 +2307,108 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
2306
2307
assertEquals(0 , result(0 ).size)
2307
2308
}
2308
2309
2310
+ @ Test
2311
+ def testNestedFieldsUpsert (): Unit = {
2312
+ val update_params = " new_samples: samples"
2313
+ val update_script = " ctx._source.samples = params.new_samples"
2314
+ val es_conf = Map (
2315
+ " es.mapping.id" -> " id" ,
2316
+ " es.write.operation" -> " upsert" ,
2317
+ " es.update.script.params" -> update_params,
2318
+ " es.update.script.inline" -> update_script
2319
+ )
2320
+ // First do an upsert with two completely new rows:
2321
+ var data = Seq (Row (" 2" , List (Row (" hello" ), Row (" world" ))), Row (" 1" , List ()))
2322
+ var rdd : RDD [Row ] = sc.parallelize(data)
2323
+ val schema = new StructType ()
2324
+ .add(" id" , StringType , nullable = false )
2325
+ .add(" samples" , new ArrayType (new StructType ()
2326
+ .add(" text" , StringType ), true ))
2327
+ var df = sqc.createDataFrame(rdd, schema)
2328
+ df.write.format(" org.elasticsearch.spark.sql" ).options(es_conf).mode(SaveMode .Append ).save(" nested_fields_upsert_test" )
2329
+
2330
+ val reader = sqc.read.schema(schema).format(" org.elasticsearch.spark.sql" ).option(" es.read.field.as.array.include" ," samples" )
2331
+ var resultDf = reader.load(" nested_fields_upsert_test" )
2332
+ assertEquals(2 , resultDf.count())
2333
+ var samples = resultDf.where(resultDf(" id" ).equalTo(" 2" )).select(" samples" ).first().getAs[IndexedSeq [Row ]](0 )
2334
+ assertEquals(2 , samples.size)
2335
+ assertEquals(" hello" , samples(0 ).get(0 ))
2336
+ assertEquals(" world" , samples(1 ).get(0 ))
2337
+
2338
+ // Now, do an upsert on the one with the empty samples list:
2339
+ data = Seq (Row (" 1" , List (Row (" goodbye" ), Row (" world" ))))
2340
+ rdd = sc.parallelize(data)
2341
+ df = sqc.createDataFrame(rdd, schema)
2342
+ df.write.format(" org.elasticsearch.spark.sql" ).options(es_conf).mode(SaveMode .Append ).save(" nested_fields_upsert_test" )
2343
+
2344
+ resultDf = reader.load(" nested_fields_upsert_test" )
2345
+ samples = resultDf.where(resultDf(" id" ).equalTo(" 1" )).select(" samples" ).first().getAs[IndexedSeq [Row ]](0 )
2346
+ assertEquals(2 , samples.size)
2347
+ assertEquals(" goodbye" , samples(0 ).get(0 ))
2348
+ assertEquals(" world" , samples(1 ).get(0 ))
2349
+
2350
+ // Finally, an upsert on the row that had samples values:
2351
+ data = Seq (Row (" 2" , List (Row (" goodbye" ), Row (" again" ))))
2352
+ rdd = sc.parallelize(data)
2353
+ df = sqc.createDataFrame(rdd, schema)
2354
+ df.write.format(" org.elasticsearch.spark.sql" ).options(es_conf).mode(SaveMode .Append ).save(" nested_fields_upsert_test" )
2355
+
2356
+ resultDf = reader.load(" nested_fields_upsert_test" )
2357
+ samples = resultDf.where(resultDf(" id" ).equalTo(" 2" )).select(" samples" ).first().getAs[IndexedSeq [Row ]](0 )
2358
+ assertEquals(2 , samples.size)
2359
+ assertEquals(" goodbye" , samples(0 ).get(0 ))
2360
+ assertEquals(" again" , samples(1 ).get(0 ))
2361
+ }
2362
+
2363
+ @ Test
2364
+ def testMapsUpsert (): Unit = {
2365
+ val update_params = " new_samples: samples"
2366
+ val update_script = " ctx._source.samples = params.new_samples"
2367
+ val es_conf = Map (
2368
+ " es.mapping.id" -> " id" ,
2369
+ " es.write.operation" -> " upsert" ,
2370
+ " es.update.script.params" -> update_params,
2371
+ " es.update.script.inline" -> update_script
2372
+ )
2373
+ // First do an upsert with two completely new rows:
2374
+ var data = Seq (Row (" 2" , Map ((" hello" , " world" ))), Row (" 1" , Map ()))
2375
+ var rdd : RDD [Row ] = sc.parallelize(data)
2376
+ val schema = new StructType ()
2377
+ .add(" id" , StringType , nullable = false )
2378
+ .add(" samples" , new MapType (StringType , StringType , true ))
2379
+ var df = sqc.createDataFrame(rdd, schema)
2380
+ df.write.format(" org.elasticsearch.spark.sql" ).options(es_conf).mode(SaveMode .Append ).save(" map_fields_upsert_test" )
2381
+
2382
+ val reader = sqc.read.format(" org.elasticsearch.spark.sql" )
2383
+ var resultDf = reader.load(" map_fields_upsert_test" )
2384
+ assertEquals(2 , resultDf.count())
2385
+ var samples = resultDf.where(resultDf(" id" ).equalTo(" 2" )).select(" samples" ).first()
2386
+ assertEquals(1 , samples.size)
2387
+ assertEquals(" world" , samples.get(0 ).asInstanceOf [Row ].get(0 ))
2388
+
2389
+ // Now, do an upsert on the one with the empty samples list:
2390
+ data = Seq (Row (" 1" , Map ((" goodbye" , " all" ))))
2391
+ rdd = sc.parallelize(data)
2392
+ df = sqc.createDataFrame(rdd, schema)
2393
+ df.write.format(" org.elasticsearch.spark.sql" ).options(es_conf).mode(SaveMode .Append ).save(" map_fields_upsert_test" )
2394
+
2395
+ resultDf = reader.load(" map_fields_upsert_test" )
2396
+ samples = resultDf.where(resultDf(" id" ).equalTo(" 1" )).select(" samples" ).first()
2397
+ assertEquals(1 , samples.size)
2398
+ assertEquals(" all" , samples.get(0 ).asInstanceOf [Row ].get(0 ))
2399
+
2400
+ // Finally, an upsert on the row that had samples values:
2401
+ data = Seq (Row (" 2" , Map ((" goodbye" , " again" ))))
2402
+ rdd = sc.parallelize(data)
2403
+ df = sqc.createDataFrame(rdd, schema)
2404
+ df.write.format(" org.elasticsearch.spark.sql" ).options(es_conf).mode(SaveMode .Append ).save(" map_fields_upsert_test" )
2405
+
2406
+ resultDf = reader.load(" map_fields_upsert_test" )
2407
+ samples = resultDf.where(resultDf(" id" ).equalTo(" 2" )).select(" samples" ).first()
2408
+ assertEquals(1 , samples.size)
2409
+ assertEquals(" again" , samples.get(0 ).asInstanceOf [Row ].get(0 ))
2410
+ }
2411
+
2309
2412
@ Test
2310
2413
def testWildcard () {
2311
2414
val mapping = wrapMapping(" data" , s """ {
0 commit comments