Skip to content

Commit a375db5

Browse files
committed
Merge remote-tracking branch 'origin/add-old-document' into old-doc-and-topic-map
2 parents ff98321 + 624fd8d commit a375db5

File tree

4 files changed

+61
-10
lines changed

4 files changed

+61
-10
lines changed

source/src/main/java/com/trustpilot/connector/dynamodb/DynamoDBSourceTask.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ private List<SourceRecord> sync() throws Exception {
334334
((RecordAdapter) record).getInternalObject();
335335

336336
Envelope.Operation op = getOperation(dynamoDbRecord.getEventName());
337+
String eventId = dynamoDbRecord.getEventID();
337338

338339
Map<String, AttributeValue> attributes;
339340
if (dynamoDbRecord.getDynamodb().getNewImage() != null) {
@@ -344,7 +345,9 @@ private List<SourceRecord> sync() throws Exception {
344345

345346
SourceRecord sourceRecord = converter.toSourceRecord(sourceInfo,
346347
op,
348+
eventId,
347349
attributes,
350+
dynamoDbRecord.getDynamodb().getOldImage(),
348351
arrivalTimestamp.toInstant(),
349352
dynamoDBRecords.getShardId(),
350353
record.getSequenceNumber());
@@ -440,4 +443,4 @@ ArrayBlockingQueue<KclRecordsWrapper> getEventsQueue() {
440443
SourceInfo getSourceInfo() {
441444
return sourceInfo;
442445
}
443-
}
446+
}

source/src/main/java/com/trustpilot/connector/dynamodb/Envelope.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,18 @@ public static final class FieldName {
5656
* The {@code after} field is used to store the state of a record after an operation.
5757
*/
5858
public static final String DOCUMENT = "document";
59+
/**
60+
* The {@code before} field is used to store the state of a record before the operation.
61+
*/
62+
public static final String OLD_DOCUMENT = "old_document";
5963
/**
6064
* The {@code op} field is used to store the kind of operation on a record.
6165
*/
6266
public static final String OPERATION = "op";
67+
/**
68+
* The {@code event_id} field is used to store a globally unique identifier for the event that was recorded in this stream record.
69+
*/
70+
public static final String EVENT_ID = "event_id";
6371
/**
6472
* The {@code origin} field is used to store the information about the source of a record, including the
6573
* Kafka Connect partition and offset information.
@@ -73,6 +81,8 @@ public static final class FieldName {
7381
* variations.
7482
*/
7583
public static final String TIMESTAMP = "ts_ms";
84+
85+
public static final String KEY = "key";
7686
}
7787

7888
}

source/src/main/java/com/trustpilot/connector/dynamodb/utils/DynamoDbJson.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,12 @@ public static SchemaBuilder builder() {
3939
public static Schema schema() {
4040
return builder().build();
4141
}
42+
43+
public static Schema optionalSchema() {
44+
return SchemaBuilder.string()
45+
.name(LOGICAL_NAME+".optional")
46+
.version(1)
47+
.optional()
48+
.build();
49+
}
4250
}

source/src/main/java/com/trustpilot/connector/dynamodb/utils/RecordConverter.java

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.List;
2121
import java.util.Map;
2222
import java.util.stream.Collectors;
23+
import java.util.Optional;
2324

2425
import static java.util.stream.Collectors.toList;
2526

@@ -41,7 +42,7 @@ public class RecordConverter {
4142

4243
private final TableDescription tableDesc;
4344
private final String topic_name;
44-
private Schema keySchema;
45+
private final Schema keySchema;
4546
private final Schema valueSchema;
4647

4748
private List<String> keys;
@@ -54,13 +55,18 @@ public RecordConverter(TableDescription tableDesc, String topicNamePrefix, Strin
5455
this.tableDesc = tableDesc;
5556
this.topic_name = topicNamePrefix + this.getTopicNameSuffix(topicNamespaceMap, tableDesc.getTableName());
5657

57-
valueSchema = SchemaBuilder.struct()
58+
this.keys = tableDesc.getKeySchema().stream().map(this::sanitiseAttributeName).collect(toList());
59+
this.keySchema = getKeySchema(keys);
60+
this.valueSchema = SchemaBuilder.struct()
5861
.name(SchemaNameAdjuster.DEFAULT.adjust( "com.trustpilot.connector.dynamodb.envelope"))
5962
.field(Envelope.FieldName.VERSION, Schema.STRING_SCHEMA)
63+
.field(Envelope.FieldName.EVENT_ID, Schema.OPTIONAL_STRING_SCHEMA)
6064
.field(Envelope.FieldName.DOCUMENT, DynamoDbJson.schema())
65+
.field(Envelope.FieldName.OLD_DOCUMENT, DynamoDbJson.optionalSchema())
6166
.field(Envelope.FieldName.SOURCE, SourceInfo.structSchema())
6267
.field(Envelope.FieldName.OPERATION, Schema.STRING_SCHEMA)
6368
.field(Envelope.FieldName.TIMESTAMP, Schema.INT64_SCHEMA)
69+
.field(Envelope.FieldName.KEY, this.keySchema)
6470
.build();
6571
}
6672

@@ -70,6 +76,19 @@ public SourceRecord toSourceRecord(
7076
Map<String, AttributeValue> attributes,
7177
Instant arrivalTimestamp,
7278
String shardId,
79+
String sequenceNumber) throws Exception{
80+
81+
return toSourceRecord(sourceInfo, op, null, attributes, null, arrivalTimestamp, shardId, sequenceNumber);
82+
}
83+
84+
public SourceRecord toSourceRecord(
85+
SourceInfo sourceInfo,
86+
Envelope.Operation op,
87+
String eventId,
88+
Map<String, AttributeValue> attributes,
89+
Map<String, AttributeValue> oldAttributes,
90+
Instant arrivalTimestamp,
91+
String shardId,
7392
String sequenceNumber) throws Exception {
7493

7594
// Sanitise the incoming attributes to remove any invalid Avro characters
@@ -81,18 +100,13 @@ public SourceRecord toSourceRecord(
81100
LinkedHashMap::new
82101
));
83102

103+
84104
// Leveraging offsets to store shard and sequence number with each item pushed to Kafka.
85105
// This info will only be used to update `shardRegister` and won't be used to reset state after restart
86106
Map<String, Object> offsets = SourceInfo.toOffset(sourceInfo);
87107
offsets.put(SHARD_ID, shardId);
88108
offsets.put(SHARD_SEQUENCE_NO, sequenceNumber);
89109

90-
// DynamoDB keys can be changed only by recreating the table
91-
if (keySchema == null) {
92-
keys = tableDesc.getKeySchema().stream().map(this::sanitiseAttributeName).collect(toList());
93-
keySchema = getKeySchema(keys);
94-
}
95-
96110
Struct keyData = new Struct(getKeySchema(keys));
97111
for (String key : keys) {
98112
AttributeValue attributeValue = sanitisedAttributes.get(key);
@@ -111,7 +125,23 @@ public SourceRecord toSourceRecord(
111125
.put(Envelope.FieldName.DOCUMENT, objectMapper.writeValueAsString(sanitisedAttributes))
112126
.put(Envelope.FieldName.SOURCE, SourceInfo.toStruct(sourceInfo))
113127
.put(Envelope.FieldName.OPERATION, op.code())
114-
.put(Envelope.FieldName.TIMESTAMP, arrivalTimestamp.toEpochMilli());
128+
.put(Envelope.FieldName.TIMESTAMP, arrivalTimestamp.toEpochMilli())
129+
.put(Envelope.FieldName.KEY, keyData);
130+
131+
132+
if (eventId != null) {
133+
valueData = valueData.put(Envelope.FieldName.EVENT_ID, eventId);
134+
}
135+
if (oldAttributes != null) {
136+
Map<String, AttributeValue> sanitisedOldAttributes = oldAttributes.entrySet().stream()
137+
.collect(Collectors.toMap(
138+
e -> this.sanitiseAttributeName(e.getKey()),
139+
Map.Entry::getValue,
140+
(u, v) -> u,
141+
LinkedHashMap::new
142+
));
143+
valueData = valueData.put(Envelope.FieldName.OLD_DOCUMENT, objectMapper.writeValueAsString(sanitisedOldAttributes));
144+
}
115145

116146
return new SourceRecord(
117147
Collections.singletonMap("table_name", sourceInfo.tableName),

0 commit comments

Comments
 (0)