File tree Expand file tree Collapse file tree 3 files changed +42
-0
lines changed
ansible/roles/opensearch/tasks
environments/common/files/filebeat Expand file tree Collapse file tree 3 files changed +42
-0
lines changed Original file line number Diff line number Diff line change
1
+ # Remove data which was NOT indexed by Slurm Job ID
2
+ # It will be re-ingested by filebeat from the slurmdbd, with that index
3
+
4
+ - name : Ensure opensearch stopped
5
+ systemd :
6
+ name : opensearch
7
+ state : stopped
8
+ register : _opensearch_stop
9
+ until : " _opensearch_stop.status.ActiveState in ['inactive', 'failed']"
10
+ retries : 15
11
+ delay : 5
12
+
13
+ - name : Archive existing data
14
+ community.general.archive :
15
+ path : " {{ opensearch_data_path }}"
16
+ dest : " {{ opensearch_data_path | dirname }}/data-{{ lookup('pipe', 'date --iso-8601=minutes') }}.tar.gz"
17
+ remove : true
Original file line number Diff line number Diff line change 15
15
path : /etc/systemd/system/opendistro.service
16
16
state : absent
17
17
18
+ - name : Enumerate files in data directory
19
+ find :
20
+ path : " {{ opensearch_data_path }}"
21
+ register : _find_opensearch_data
22
+
23
+ - name : Archive incorrectly indexed data
24
+ import_tasks : archive_data.yml
25
+ when :
26
+ - _find_opensearch_data.files | length > 0
27
+ - " 'slurm_jobid_index' not in _find_opensearch_data.files | map(attribute='path') | map('basename')"
28
+
18
29
- name : Ensure required opensearch host directories exist
19
30
file :
20
31
state : directory
27
38
- " {{ opensearch_config_path }}"
28
39
- " {{ opensearch_data_path }}"
29
40
41
+ - name : Set indexed data flag
42
+ copy :
43
+ dest : " {{ opensearch_data_path }}/slurm_jobid_index"
44
+ content : |
45
+ This is a flag file to indicate that filebeat is pushing data
46
+ indexed by Slurm JobID to prevent duplicate OpenSearch records
47
+ owner : " {{ opensearch_podman_user }}"
48
+ group : " {{ opensearch_podman_user }}"
49
+
30
50
- name : Create certs
31
51
import_tasks : certs.yml
32
52
Original file line number Diff line number Diff line change @@ -22,6 +22,11 @@ filebeat.inputs:
22
22
fields_under_root : true
23
23
24
24
processors :
25
+ # Want to use the Slurm JobID as the ElasticSearch id to avoid duplicated records
26
+ # Don't use filebeat.inputs:json.document_id as this removes the JobID from the record
27
+ - fingerprint :
28
+ fields : ["json.JobID"]
29
+ target_field : " @metadata._id"
25
30
- timestamp :
26
31
field : json.End
27
32
layouts :
You can’t perform that action at this time.
0 commit comments