Elasticsearch keep migrating shards out of one of my data node data-mbesdrtp21. This cluster have mix of plain index and data streams. Half of the data nodes in an availability zone named rtp1 and other half is in an availability zone named rtp2. Shard Allocation Awareness is enabled. Data is sent to elasticsearch via logstash. Around 10-15 shards get allocated to this node and then they are reallocated to other nodes. Any idea what might be happening here?
- Elasticsearch Version: 7.17.1
- VM Size: 14 CPU, 64GB RAM
Cluster Settings:
/_cluster/settings?flat_settings=true
{
"persistent": {
"cluster.routing.allocation.awareness.attributes": "zone",
"cluster.routing.allocation.awareness.force.zone.values": "rtp1,rtp2",
"xpack.monitoring.collection.enabled": "true"
},
"transient": {
"cluster.routing.allocation.cluster_concurrent_rebalance": "10",
"cluster.routing.allocation.enable": "all",
"cluster.routing.allocation.node_concurrent_incoming_recoveries": "4",
"cluster.routing.allocation.node_concurrent_outgoing_recoveries": "4",
"cluster.routing.allocation.node_concurrent_recoveries": "4",
"cluster.routing.allocation.node_initial_primaries_recoveries": "4",
"cluster.routing.rebalance.enable": "all",
"indices.recovery.max_bytes_per_sec": "256mb"
}
}
_cat/allocation
shards disk.indices disk.used disk.avail disk.total disk.percent node
342 5.4tb 5.4tb 2.2tb 7.6tb 71 data-mbesdrtp11
342 5.7tb 5.7tb 1.7tb 7.4tb 76 data-mbesdrtp110
342 5.6tb 5.6tb 1.8tb 7.4tb 74 data-mbesdrtp111
342 5.8tb 5.8tb 1.6tb 7.4tb 78 data-mbesdrtp112
342 5.6tb 5.6tb 1.8tb 7.4tb 75 data-mbesdrtp12
342 5.6tb 5.6tb 1.8tb 7.4tb 75 data-mbesdrtp13
342 5.8tb 5.8tb 1.6tb 7.4tb 78 data-mbesdrtp14
342 5.6tb 5.6tb 1.8tb 7.4tb 75 data-mbesdrtp15
341 5.6tb 5.6tb 1.8tb 7.4tb 75 data-mbesdrtp16
341 5.7tb 5.7tb 1.7tb 7.4tb 76 data-mbesdrtp17
342 5.7tb 5.7tb 1.7tb 7.4tb 76 data-mbesdrtp18
341 5.7tb 5.7tb 1.7tb 7.4tb 76 data-mbesdrtp19
7 150.3gb 156.9gb 7.3tb 7.4tb 2 data-mbesdrtp21
368 6.3tb 6.3tb 1.1tb 7.4tb 85 data-mbesdrtp210
374 6tb 6tb 1.3tb 7.4tb 81 data-mbesdrtp211
367 6.3tb 6.3tb 1.1tb 7.4tb 85 data-mbesdrtp212
373 6tb 6tb 1.4tb 7.4tb 80 data-mbesdrtp22
373 6.1tb 6.1tb 1.3tb 7.4tb 81 data-mbesdrtp23
373 6.2tb 6.2tb 1.2tb 7.4tb 82 data-mbesdrtp24
373 6tb 6tb 1.4tb 7.4tb 80 data-mbesdrtp25
373 6.2tb 6.2tb 1.2tb 7.4tb 83 data-mbesdrtp26
373 6.2tb 6.2tb 1.2tb 7.4tb 83 data-mbesdrtp27
374 6.1tb 6.1tb 1.3tb 7.4tb 82 data-mbesdrtp28
373 6.1tb 6.1tb 1.3tb 7.4tb 81 data-mbesdrtp29
elasticsearch.yml which is same for all the data nodes except for node.name
and network.host
:
cluster.name: metricbeat-prod
node.name: data-mbesdrtp21
node.roles: [ data_hot , data_content ]
path.data: ["/apps/elk/data"]
discovery.seed_hosts: [ "mbesmoma2vo", "mbesmrtp11", "mbesmrtp21" ]
path.logs: /apps/elk/logs
network.host: mbesdrtp21.fmr.com
http.port: 9200
bootstrap.memory_lock: True
script.painless.regex.enabled: True
xpack.graph.enabled: false
xpack.ml.enabled: false
xpack.monitoring.enabled: true
xpack.watcher.enabled: false
xpack.monitoring.collection.enabled: true
thread_pool.write.queue_size : 5000
node.attr.zone: rtp2
node.attr.box_type: hot
thread_pool.search.queue_size : 10000
xpack.security.enabled: False
http.type: ssl_netty4
s3.client.ceph.endpoint : https://xxx.yyy.com
_cluster/health
{
"cluster_name": "metricbeat-prod",
"status": "green",
"timed_out": false,
"number_of_nodes": 29,
"number_of_data_nodes": 24,
"active_primary_shards": 4097,
"active_shards": 8202,
"relocating_shards": 6,
"initializing_shards": 0,
"unassigned_shards": 0,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 100.0
}
ILM details
PUT _ilm/policy/metrics
{
"policy": {
"phases": {
"hot": {
"min_age": "0ms",
"actions": {
"rollover": {
"max_primary_shard_size": "25gb",
"max_age": "15d"
}
}
},
"warm": {
"min_age": "15d",
"actions": {
"forcemerge": {
"max_num_segments": 1
},
"set_priority": {
"priority": 50
},
"migrate": {
"enabled": false
}
}
},
"delete": {
"min_age": "45d",
"actions": {
"delete": {
"delete_searchable_snapshot": true
}
}
}
},
"_meta": {
"description": "default policy for the metrics index template installed by x-pack",
"managed": true
}
}
}
_cat/nodeattrs
node host attr value
data-mbesdrtp24 mbesdrtp24.fmr.com box_type hot
data-mbesdrtp24 mbesdrtp24.fmr.com xpack.installed true
data-mbesdrtp24 mbesdrtp24.fmr.com zone rtp2
data-mbesdrtp24 mbesdrtp24.fmr.com transform.node false
data-mbesdrtp26 mbesdrtp26.fmr.com box_type hot
data-mbesdrtp26 mbesdrtp26.fmr.com xpack.installed true
data-mbesdrtp26 mbesdrtp26.fmr.com zone rtp2
data-mbesdrtp26 mbesdrtp26.fmr.com transform.node false
data-mbesdrtp12 mbesdrtp12.fmr.com box_type hot
data-mbesdrtp12 mbesdrtp12.fmr.com xpack.installed true
data-mbesdrtp12 mbesdrtp12.fmr.com zone rtp1
data-mbesdrtp12 mbesdrtp12.fmr.com transform.node false
master-voteonly-mbesmoma2vo mbesmoma2vo.fmr.com xpack.installed true
master-voteonly-mbesmoma2vo mbesmoma2vo.fmr.com transform.node false
data-mbesdrtp22 mbesdrtp22.fmr.com box_type hot
data-mbesdrtp22 mbesdrtp22.fmr.com xpack.installed true
data-mbesdrtp22 mbesdrtp22.fmr.com zone rtp2
data-mbesdrtp22 mbesdrtp22.fmr.com transform.node false
data-mbesdrtp210 mbesdrtp210.fmr.com xpack.installed true
data-mbesdrtp210 mbesdrtp210.fmr.com box_type hot
data-mbesdrtp210 mbesdrtp210.fmr.com zone rtp2
data-mbesdrtp210 mbesdrtp210.fmr.com transform.node false
data-mbesdrtp211 mbesdrtp211.fmr.com xpack.installed true
data-mbesdrtp211 mbesdrtp211.fmr.com box_type hot
data-mbesdrtp211 mbesdrtp211.fmr.com zone rtp2
data-mbesdrtp211 mbesdrtp211.fmr.com transform.node false
data-mbesdrtp212 mbesdrtp212.fmr.com box_type hot
data-mbesdrtp212 mbesdrtp212.fmr.com xpack.installed true
data-mbesdrtp212 mbesdrtp212.fmr.com zone rtp2
data-mbesdrtp212 mbesdrtp212.fmr.com transform.node false
data-mbesdrtp27 mbesdrtp27.fmr.com xpack.installed true
data-mbesdrtp27 mbesdrtp27.fmr.com box_type hot
data-mbesdrtp27 mbesdrtp27.fmr.com zone rtp2
data-mbesdrtp27 mbesdrtp27.fmr.com transform.node false
data-mbesdrtp25 mbesdrtp25.fmr.com box_type hot
data-mbesdrtp25 mbesdrtp25.fmr.com xpack.installed true
data-mbesdrtp25 mbesdrtp25.fmr.com zone rtp2
data-mbesdrtp25 mbesdrtp25.fmr.com transform.node false
data-mbesdrtp111 mbesdrtp111.fmr.com xpack.installed true
data-mbesdrtp111 mbesdrtp111.fmr.com box_type hot
data-mbesdrtp111 mbesdrtp111.fmr.com zone rtp1
data-mbesdrtp111 mbesdrtp111.fmr.com transform.node false
data-mbesdrtp29 mbesdrtp29.fmr.com xpack.installed true
data-mbesdrtp29 mbesdrtp29.fmr.com box_type hot
data-mbesdrtp29 mbesdrtp29.fmr.com zone rtp2
data-mbesdrtp29 mbesdrtp29.fmr.com transform.node false
data-mbesdrtp11 mbesdrtp11.fmr.com box_type hot
data-mbesdrtp11 mbesdrtp11.fmr.com xpack.installed true
data-mbesdrtp11 mbesdrtp11.fmr.com zone rtp1
data-mbesdrtp11 mbesdrtp11.fmr.com transform.node false
data-mbesdrtp110 mbesdrtp110.fmr.com xpack.installed true
data-mbesdrtp110 mbesdrtp110.fmr.com box_type hot
data-mbesdrtp110 mbesdrtp110.fmr.com zone rtp1
data-mbesdrtp110 mbesdrtp110.fmr.com transform.node false
data-mbesdrtp16 mbesdrtp16.fmr.com box_type hot
data-mbesdrtp16 mbesdrtp16.fmr.com xpack.installed true
data-mbesdrtp16 mbesdrtp16.fmr.com zone rtp1
data-mbesdrtp16 mbesdrtp16.fmr.com transform.node false
data-mbesdrtp28 mbesdrtp28.fmr.com box_type hot
data-mbesdrtp28 mbesdrtp28.fmr.com xpack.installed true
data-mbesdrtp28 mbesdrtp28.fmr.com zone rtp2
data-mbesdrtp28 mbesdrtp28.fmr.com transform.node false
data-mbesdrtp21 mbesdrtp21.fmr.com xpack.installed true
data-mbesdrtp21 mbesdrtp21.fmr.com box_type hot
data-mbesdrtp21 mbesdrtp21.fmr.com zone rtp2
data-mbesdrtp21 mbesdrtp21.fmr.com transform.node false
data-mbesdrtp19 mbesdrtp19.fmr.com xpack.installed true
data-mbesdrtp19 mbesdrtp19.fmr.com box_type hot
data-mbesdrtp19 mbesdrtp19.fmr.com zone rtp1
data-mbesdrtp19 mbesdrtp19.fmr.com transform.node false
master-mbesmrtp21 mbesmrtp21.fmr.com xpack.installed true
master-mbesmrtp21 mbesmrtp21.fmr.com transform.node false
client-kibana-mbescrtp21 mbescrtp21.fmr.com xpack.installed true
client-kibana-mbescrtp21 mbescrtp21.fmr.com transform.node false
master-mbesmrtp11 mbesmrtp11.fmr.com xpack.installed true
master-mbesmrtp11 mbesmrtp11.fmr.com transform.node false
data-mbesdrtp14 mbesdrtp14.fmr.com xpack.installed true
data-mbesdrtp14 mbesdrtp14.fmr.com box_type hot
data-mbesdrtp14 mbesdrtp14.fmr.com zone rtp1
data-mbesdrtp14 mbesdrtp14.fmr.com transform.node false
data-mbesdrtp23 mbesdrtp23.fmr.com box_type hot
data-mbesdrtp23 mbesdrtp23.fmr.com xpack.installed true
data-mbesdrtp23 mbesdrtp23.fmr.com zone rtp2
data-mbesdrtp23 mbesdrtp23.fmr.com transform.node false
data-mbesdrtp112 mbesdrtp112.fmr.com box_type hot
data-mbesdrtp112 mbesdrtp112.fmr.com xpack.installed true
data-mbesdrtp112 mbesdrtp112.fmr.com zone rtp1
data-mbesdrtp112 mbesdrtp112.fmr.com transform.node false
client-kibana-mbescrtp11 mbescrtp11.fmr.com xpack.installed true
client-kibana-mbescrtp11 mbescrtp11.fmr.com transform.node false
data-mbesdrtp18 mbesdrtp18.fmr.com xpack.installed true
data-mbesdrtp18 mbesdrtp18.fmr.com box_type hot
data-mbesdrtp18 mbesdrtp18.fmr.com zone rtp1
data-mbesdrtp18 mbesdrtp18.fmr.com transform.node false
data-mbesdrtp17 mbesdrtp17.fmr.com box_type hot
data-mbesdrtp17 mbesdrtp17.fmr.com xpack.installed true
data-mbesdrtp17 mbesdrtp17.fmr.com zone rtp1
data-mbesdrtp17 mbesdrtp17.fmr.com transform.node false
data-mbesdrtp15 mbesdrtp15.fmr.com xpack.installed true
data-mbesdrtp15 mbesdrtp15.fmr.com box_type hot
data-mbesdrtp15 mbesdrtp15.fmr.com zone rtp1
data-mbesdrtp15 mbesdrtp15.fmr.com transform.node false
data-mbesdrtp13 mbesdrtp13.fmr.com xpack.installed true
data-mbesdrtp13 mbesdrtp13.fmr.com box_type hot
data-mbesdrtp13 mbesdrtp13.fmr.com zone rtp1
data-mbesdrtp13 mbesdrtp13.fmr.com transform.node false
cluster.routing: _cluster/settings?filter_path=.cluster.routing.
{
"persistent": {
"cluster": {
"routing": {
"allocation": {
"awareness": {
"attributes": "zone",
"force": {
"zone": {
"values": "rtp1,rtp2"
}
}
}
}
}
}
},
"transient": {
"cluster": {
"routing": {
"rebalance": {
"enable": "all"
},
"allocation": {
"node_concurrent_incoming_recoveries": "4",
"cluster_concurrent_rebalance": "10",
"node_concurrent_recoveries": "4",
"node_initial_primaries_recoveries": "4",
"enable": "all",
"node_concurrent_outgoing_recoveries": "4"
}
}
}
}
}