Skip to content

Commit ebc2303

Browse files
authored
Documenting that we do not support dots in field names (#1900)
Es-hadoop does not support fields with dots in their names (#853). Adding support is likely to cause more problems than it fixes. So this commit documents that we do not support them, and adds a better error message.
1 parent 77911f4 commit ebc2303

File tree

3 files changed

+29
-3
lines changed

3 files changed

+29
-3
lines changed

docs/src/reference/asciidoc/core/mapping.adoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,10 @@ Explicit or manual mapping should be considered when the defaults need to be ove
144144
Refer to {es} {ref}/indices-create-index.html[create index] and {ref}/indices-put-mapping.html[mapping] documentation on how to define an index and its types - note that these need to be present *before* data is being uploaded to {es} (otherwise automatic mapping will be used by {es}, if enabled).
145145

146146
TIP: In most cases, {ref}/indices-templates.html[templates] are quite handy as they are automatically applied to new indices created that match the pattern; in other words instead of defining the mapping per index, one can just define the template once and then have it applied to all indices that match its pattern.
147+
148+
[float]
149+
[[limitations]]
150+
=== Limitations
151+
152+
{es} allows field names to contain dots ('.'). But {esh} does not support them, and fails when reading or writing fields with dots. Refer to
153+
{es} {ref}/dot-expand-processor.html[Dot Expander Processor] for tooling to assist replacing dots in field names.

spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import java.{util => ju}
2929
import java.util.concurrent.TimeUnit
3030
import org.elasticsearch.spark.integration.ScalaUtils.propertiesAsScalaMap
3131
import org.elasticsearch.spark.rdd.JDKCollectionConvertersCompat.Converters._
32+
3233
import scala.collection.Map
3334
import scala.collection.mutable.ArrayBuffer
3435
import org.apache.spark.SparkConf
@@ -92,7 +93,7 @@ import org.apache.spark.sql.SparkSession
9293
import org.elasticsearch.hadoop.EsAssume
9394
import org.elasticsearch.hadoop.TestData
9495
import org.elasticsearch.hadoop.cfg.ConfigurationOptions
95-
import org.elasticsearch.hadoop.rest.RestUtils
96+
import org.elasticsearch.hadoop.rest.{EsHadoopParsingException, RestUtils}
9697
import org.elasticsearch.hadoop.serialization.JsonUtils
9798
import org.elasticsearch.hadoop.util.EsMajorVersion
9899
import org.junit.Assert._
@@ -2541,6 +2542,19 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
25412542
assertThat(head.getString(0), containsString("Chipotle"))
25422543
}
25432544

2545+
/**
2546+
* Dots in field names are supported by Elasticsearch, but not by es-hadoop. We expect them to fail.
2547+
*/
2548+
@Test(expected = classOf[SparkException])
2549+
def testDotsInFieldNames(): Unit = {
2550+
val index = wrapIndex("dots-in-names-index")
2551+
val typed = "data"
2552+
val (target, docPath) = makeTargets(index, typed)
2553+
RestUtils.postData(docPath, "{\"b\":0,\"e\":{\"f.g\":\"hello\"}}".getBytes("UTF-8"))
2554+
val df = sqc.read.format("es").load(index)
2555+
df.count()
2556+
}
2557+
25442558
/**
25452559
* Take advantage of the fixed method order and clear out all created indices.
25462560
* The indices will last in Elasticsearch for all parameters of this test suite.
@@ -2587,4 +2601,4 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
25872601
val lines = Files.readAllLines(path, StandardCharsets.ISO_8859_1).asScala.toSeq
25882602
sc.parallelize(lines)
25892603
}
2590-
}
2604+
}

spark/sql-30/src/main/scala/org/elasticsearch/spark/sql/RowValueReader.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@ private[sql] trait RowValueReader extends SettingsAware {
5757
if (pos < 0 || pos >= esRow.values.size) {
5858
// geo types allow fields which are ignored - need to skip these if they are not part of the schema
5959
if (pos >= 0 || !currentFieldIsGeo) {
60-
throw new EsHadoopIllegalStateException(s"Position for '$sparkRowField' not found in row; typically this is caused by a mapping inconsistency")
60+
if (key.toString().contains(".")) {
61+
throw new EsHadoopIllegalStateException(
62+
s"Found field '$sparkRowField'. Fields containing dots ('.') are not supported in es-hadoop")
63+
} else {
64+
throw new EsHadoopIllegalStateException(s"Position for '$sparkRowField' not found in row; typically this is caused by a mapping inconsistency")
65+
}
6166
}
6267
} else {
6368
esRow.values.update(pos, value)

0 commit comments

Comments
 (0)