I want to make a shrink operation on a OS index. I have the following code:
self.raw_request(
"PUT",
f"{index_name}/_settings",
{
"index": {
"blocks.write": True,
"routing.allocation.require._name": self.get_random_node_name(),
"number_of_replicas": 0,
}
},
)
logging.info(f"Waiting for write block on {index_name}...")
is_blocked = False
while not is_blocked:
response = self.raw_request("GET", f"{index_name}/")[index_name]["settings"]["index"][
"blocks"
]["write"]
is_blocked = response == "true"
logging.info(f"Waiting for health on {index_name}...")
self.raw_request(
"GET",
f"_cluster/health/{index_name}",
params={
"wait_for_no_initializing_shards": "true",
"wait_for_no_relocating_shards": "true",
"wait_for_status": "green",
"wait_for_active_shards": "all",
"timeout": "60m",
"wait_for_events": ["immediate", "urgent", "high", "normal", "low", "languid"],
},
)
logging.info("Waiting for tasks...")
pending_tasks = True
while pending_tasks:
tasks = self.raw_request("GET", "_cluster/pending_tasks", None)
pending_tasks = True if len(tasks["tasks"]) > 0 else False
logging.info("Waiting for snapshot completion...")
snapshot_in_progress = True
while snapshot_in_progress:
if len(self.raw_request("GET", "_snapshot/_status", None)["snapshots"]) <= 0:
snapshot_in_progress = False
logging.info("Moving index to optimized with new number of shards...")
self.raw_request(
"POST",
f"{index_name}/_shrink/{index_name}_optimized",
{
"settings": {
"index.number_of_shards": shards_needed,
"index.number_of_replicas": 1,
"routing.allocation.require._name": None,
},
},
)
This works only 50% of the time. It fails always with:
{"log_id":"20230616171121", "created_date":"2023-06-16T17:19:55.385", "action_text":"Response: {'error': {'root_cause': [{'type': 'illegal_state_exception', 'reason': 'index my_index must be read-only to resize index. use "index.blocks.write=true"'}], 'type': 'illegal_state_exception', 'reason': 'index my_index must be read-only to resize index. use "index.blocks.write=true"'}, 'status': 500}"}
In my code I am actually checking for the block status, waiting for snapshots, events and tasks. What am I doing wrong here? Is there something in between calls that can make the lock go away?