Skip to content

Commit 2937725

Browse files
committed
use slurm jobid for opensearch index and archive old data
1 parent 884df2a commit 2937725

File tree

3 files changed

+42
-0
lines changed

3 files changed

+42
-0
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Remove data which was NOT indexed by Slurm Job ID
2+
# It will be re-ingested by filebeat from the slurmdbd, with that index
3+
4+
- name: Ensure opensearch stopped
5+
systemd:
6+
name: opensearch
7+
state: stopped
8+
register: _opensearch_stop
9+
until: "_opensearch_stop.status.ActiveState in ['inactive', 'failed']"
10+
retries: 15
11+
delay: 5
12+
13+
- name: Archive existing data
14+
community.general.archive:
15+
path: "{{ opensearch_data_path }}"
16+
dest: "{{ opensearch_data_path | dirname }}/data-{{ lookup('pipe', 'date --iso-8601=minutes') }}.tar.gz"
17+
remove: true

ansible/roles/opensearch/tasks/runtime.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@
1515
path: /etc/systemd/system/opendistro.service
1616
state: absent
1717

18+
- name: Enumerate files in data directory
19+
find:
20+
path: "{{ opensearch_data_path }}"
21+
register: _find_opensearch_data
22+
23+
- name: Archive incorrectly indexed data
24+
import_tasks: archive_data.yml
25+
when:
26+
- _find_opensearch_data.files | length > 0
27+
- "'slurm_jobid_index' not in _find_opensearch_data.files | map(attribute='path') | map('basename')"
28+
1829
- name: Ensure required opensearch host directories exist
1930
file:
2031
state: directory
@@ -27,6 +38,15 @@
2738
- "{{ opensearch_config_path }}"
2839
- "{{ opensearch_data_path }}"
2940

41+
- name: Set indexed data flag
42+
copy:
43+
dest: "{{ opensearch_data_path }}/slurm_jobid_index"
44+
content: |
45+
This is a flag file to indicate that filebeat is pushing data
46+
indexed by Slurm JobID to prevent duplicate OpenSearch records
47+
owner: "{{ opensearch_podman_user }}"
48+
group: "{{ opensearch_podman_user }}"
49+
3050
- name: Create certs
3151
import_tasks: certs.yml
3252

environments/common/files/filebeat/filebeat.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ filebeat.inputs:
2222
fields_under_root: true
2323

2424
processors:
25+
# Want to use the Slurm JobID as the ElasticSearch id to avoid duplicated records
26+
# Don't use filebeat.inputs:json.document_id as this removes the JobID from the record
27+
- fingerprint:
28+
fields: ["json.JobID"]
29+
target_field: "@metadata._id"
2530
- timestamp:
2631
field: json.End
2732
layouts:

0 commit comments

Comments
 (0)