2

I keep getting the following error even after implementing backoff.

connection -> db.py

import os
import logging
import sys

from gremlin_python.driver import serializer
from gremlin_python.driver.protocol import GremlinServerError
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from tornado.websocket import WebSocketClosedError
from gremlin_python.driver import client

# Exception
reconnectable_err_msgs = [
    'ReadOnlyViolationException',
    'Server disconnected',
    'Connection refused'
]
retriable_err_msgs = ['ConcurrentModificationException'] + reconnectable_err_msgs
network_errors = [WebSocketClosedError, OSError]
retriable_errors = [GremlinServerError] + network_errors


# Connection Retry
def reset_connection_if_connection_issue(params):
    is_reconnectable = False

    e = sys.exc_info()[1]
    err_msg = str(e)

    if isinstance(e, tuple(network_errors)):
        is_reconnectable = True
    else:
        is_reconnectable = any(reconnectable_err_msg in err_msg for reconnectable_err_msg in reconnectable_err_msgs)

    logging.info("is_reconnectable: {}".format(is_reconnectable))

    if is_reconnectable:
        global conn
        global g
        conn.close()
        conn = create_remote_connection()
        g = create_graph_traversal_source(conn)


def is_retriable_error(e):
    is_retriable = False
    error_msg = str(e)

    if isinstance(e, tuple(network_errors)):
        is_retriable = True
    else:
        is_retriable = any(retriable_err_msg in error_msg for retriable_err_msg in retriable_err_msgs)

    print("error: [{}] {}".format(type(e), error_msg))
    print("is_retriable: {}".format(is_retriable))

    return is_retriable


def is_non_retriable_error(e):
    return not is_retriable_error(e)


# Database connection
def create_remote_connection():
    logging.info("Creating remote connection")
    return DriverRemoteConnection(
        connection_string(),
        'g',
        pool_size=1,  # Pool connection is limited to 1
        message_serializer=serializer.GraphSONSerializersV2d0()
    )


def connection_string():
    database_url = 'wss://{}:{}/gremlin'.format(os.environ.get('neptuneEndpoint'), os.environ.get('neptunePort'))
    logging.info("Connection String %s", database_url)
    return database_url


def create_graph_traversal_source(conn):
    logging.info("Connection successful, creating graph traversal")
    return traversal().withRemote(conn)


conn = create_remote_connection()
g = create_graph_traversal_source(conn)


def get_cli():
    return client.Client(connection_string(), 'g')

Query:-

@backoff.on_exception(backoff.constant, tuple(db.retriable_errors), max_tries=5, jitter=None,
                      giveup=db.is_non_retriable_error, on_backoff=db.reset_connection_if_connection_issue, interval=1)
def is_user_available(event):
    logging.debug("check the user from Neptune")
    return db.g.V(cognito_username).hasNext()
 

Error Message:

Connection was already closed.: RuntimeError
Traceback (most recent call last):
  File "/var/task/backoff/_sync.py", line 94, in retry
    ret = target(*args, **kwargs)
  File "/var/task/chalice/app.py", line 1605, in __call__
    return self.handler(event_obj)
  File "/var/task/chalice/app.py", line 1558, in __call__
    return self._original_func(event.to_dict(), event.context)
  File "/var/task/app.py", line 18, in poll_role_handler
    return queries(event, context)
  File "/var/task/app.py", line 35, in queries
    return query.list_poll_role(event, db.g)
  File "/var/task/chalicelib/query.py", line 23, in list_poll_role
    .dedup().hasLabel('user').count().next()
  File "/var/task/gremlin_python/process/traversal.py", line 88, in next
    return self.__next__()
  File "/var/task/gremlin_python/process/traversal.py", line 47, in __next__
    self.traversal_strategies.apply_strategies(self)
  File "/var/task/gremlin_python/process/traversal.py", line 548, in apply_strategies
    traversal_strategy.apply(traversal)
  File "/var/task/gremlin_python/driver/remote_connection.py", line 63, in apply
    remote_traversal = self.remote_connection.submit(traversal.bytecode)
  File "/var/task/gremlin_python/driver/driver_remote_connection.py", line 60, in submit
    results = result_set.all().result()
  File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 432, in result
    return self.__get_result()
  File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
    raise self._exception
  File "/var/task/gremlin_python/driver/resultset.py", line 90, in cb
    f.result()
  File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 425, in result
    return self.__get_result()
  File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
    raise self._exception
  File "/var/lang/lib/python3.6/concurrent/futures/thread.py", line 56, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/var/task/gremlin_python/driver/connection.py", line 82, in _receive
    data = self._transport.read()
  File "/var/task/gremlin_python/driver/aiohttp/transport.py", line 104, in read
    raise RuntimeError("Connection was already closed.")
RuntimeError: Connection was already closed.

Connection was already closed.: RuntimeError Traceback (most recent call last): File "/var/task/backoff/_sync.py", line 94, in retry ret = target(*args, **kwargs) File "/var/task/chalice/app.py", line 1605, in __call__ return self.handler(event_obj) File "/var/task/chalice/app.py", line 1558, in __call__ return self._original_func(event.to_dict(), event.context) File "/var/task/app.py", line 18, in poll_role_handler return queries(event, context) File "/var/task/app.py", line 35, in queries return query.list_poll_role(event, db.g) File "/var/task/chalicelib/query.py", line 23, in list_poll_role .dedup().hasLabel('user').count().next() File "/var/task/gremlin_python/process/traversal.py", line 88, in next return self.__next__() File "/var/task/gremlin_python/process/traversal.py", line 47, in __next__ self.traversal_strategies.apply_strategies(self) File "/var/task/gremlin_python/process/traversal.py", line 548, in apply_strategies traversal_strategy.apply(traversal) File "/var/task/gremlin_python/driver/remote_connection.py", line 63, in apply remote_traversal = self.remote_connection.submit(traversal.bytecode) File "/var/task/gremlin_python/driver/driver_remote_connection.py", line 60, in submit results = result_set.all().result() File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 432, in result return self.__get_result() File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result raise self._exception File "/var/task/gremlin_python/driver/resultset.py", line 90, in cb f.result() File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 425, in result return self.__get_result() File "/var/lang/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result raise self._exception File "/var/lang/lib/python3.6/concurrent/futures/thread.py", line 56, in run result = self.fn(*self.args, **self.kwargs) File "/var/task/gremlin_python/driver/connection.py", line 82, in _receive data = self._transport.read() File "/var/task/gremlin_python/driver/aiohttp/transport.py", line 104, in read raise RuntimeError("Connection was already closed.") RuntimeError: Connection was already closed.

How to fix this connection issue?

Thirumal
  • 8,280
  • 11
  • 53
  • 103
  • There are many reasons a connection might have been closed. It appears for some reason the connection was closed by the time the query tried to run. Connections will close if they have become idle for more than 20 minutes as one example. It will be helpful if you can share more information about your environment as the stack trace alone does not show why the connection had closed. – Kelvin Lawrence Nov 25 '21 at 12:50
  • @KelvinLawrence Added AWS lambda code implemented using aws-chalice – Thirumal Nov 25 '21 at 14:02

1 Answers1

3

Do you see any of the logging in is_retriable_error? If you do, then we know that this logic at least is being triggered, and can probably add 'Connection was already closed.' as a reconnectable error message. If not, try:

reconnectable_err_msgs = [ 
  'ReadOnlyViolationException',
  'Server disconnected',
  'Connection refused',
  'Connection was already closed.'
]

retriable_err_msgs = ['ConcurrentModificationException'] + reconnectable_err_msgs

network_errors = [WebSocketClosedError, OSError]

retriable_errors = [GremlinServerError, RuntimeError] + network_errors

Here I've added not only the error message to the reconnectable error messages, but RuntimeError to the retriable errors.

If this doesn't work then the issue then it will likely require a new reproducer for the AIOHTTP transport (the guidance was originally written for the Tornado transport).

Ian Robinson
  • 156
  • 2