so i am using django + celery(rabbitMQ). I wrote several tasks for download files, and send request to notify "hey, i got the file" response. I want to run these tasks run in parallel. Hence i used, celery's group
and chain combo. Now the main problem, when i run task it works and all the tasks were called and i just can't detect whether all tasks finished or not. Is it possible to know that all the relevant tasks finished?
PS:// I thought using celery beat does not suit for this job. Because current tasks might not done when celery beat executes.
Here is my snippet, i call this task at first.
@shared_task(name="getOrders", bind=True, retry_backoff=10, autoretry_for=(exceptions.RequestException,), retry_kwargs={"max_retries": 3, "countdown": 5})
def getOrders(self):
url = "https://example.com/orders"
orders = fetchData(url) # fetch function
if orders.status_code == requests.codes.ok:
tree = etree.fromstring(orders.content)
jobs = group([download_order.si(order_id) for order_id in tree.xpath('order/text()')])
chain(jobs, notify.s()).apply_async()
self.retry(countdown=900, max_retries=2)
download task is:
@shared_task(name="download_order", acks_late=True, autoretry_for=(exceptions.RequestException,), retry_kwargs={"max_retries": 3, "countdown": 5},)
def download_order(order_id):
"""Download order"""
print("\n===========\nstarting download on {}\n=========\n".format(order_id))
url = "https://example.com/order/{}.xml".format(order_id)
get_xml = fetchData(url)
if fulfillment_request.status_code == request.codes.ok:
print("\n=======\nxml fetched\n========\n")
tree = etree.fromstring(get_xml.content)
for product in find_product(tree):
uuid = product.find("UUID").text
for v in product.iterfind("product"):
file_type = "Video"
file_id = v.find('files/file/id').text
url = "https://example.com/download?order_id={}&type={}&task_id={}".format(
order_id, file_type, file_id
)
path = "{}/{}/{}.mp4".format(
settings.VIDEO_DIRECTORY, order_id, file_id
)
chain(
download.si(url, path),
orderReceived.si(order_id, file_type, file_id)
).delay()
@shared_task(name="notify")
def notify(id):
print("notifying", id)
@shared_task(name="order_received")
def orderReceived(order_id, ftype, file_id):
print("order received", order_id, ftype, file_id)