0

I'm trying to implement a reverse HTTP proxy with Spray/Akka, but runs into trouble. I found that under some circumstances, my proxy server will keep receivving data from upstream server even after the client has disconnected.

Here's how I implement my Spray proxy directive (just a little modification to bthuillier's implementation):

trait ProxyDirectives {

  private def sending(f: RequestContext ⇒ HttpRequest)(implicit system: ActorSystem): Route = {
    val transport = IO(Http)(system)
    ctx ⇒ transport.tell(f(ctx), ctx.responder)
  }

  /**
    * Re-shape the original request, to match the destination server.
    */
  private def reShapeRequest(req: HttpRequest, uri: Uri): HttpRequest = {
    req.copy(
      uri = uri,
      headers = req.headers.map {
        case x: HttpHeaders.Host => HttpHeaders.Host(uri.authority.host.address, uri.authority.port)
        case x => x
      }
    )
  }

  /**
    * proxy the request to the specified uri
    *
    */
  def proxyTo(uri: Uri)(implicit system: ActorSystem): Route = {
    sending(ctx => reShapeRequest(ctx.request, uri))
  }
}

This reverse proxy will work well if I put one proxy layer between the client and the server (that is, client <-> proxyTo <-> server), but it will have trouble if I put two layers between the client and the server. For example, if I've got the following simple Python HTTP server:

import socket
from threading import Thread, Semaphore
import time

from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
from SocketServer import ThreadingMixIn


class MyHTTPHandler(BaseHTTPRequestHandler):
    protocol_version = 'HTTP/1.1'

    def do_GET(self):
        self.send_response(200)
        self.send_header('Transfer-Encoding', 'chunked')
        self.end_headers()

        for i in range(100):
            data = ('%s\n' % i).encode('utf-8')
            self.wfile.write(hex(len(data))[2:].encode('utf-8'))
            self.wfile.write(b'\r\n')
            self.wfile.write(data)
            self.wfile.write(b'\r\n')
            time.sleep(1)
        self.wfile.write(b'0\r\n\r\n')


class MyServer(ThreadingMixIn, HTTPServer):
    def server_bind(self):
        HTTPServer.server_bind(self)
        self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

    def server_close(self):
        HTTPServer.server_close(self)


if __name__ == '__main__':
    server = MyServer(('127.0.0.1', 8080), MyHTTPHandler)
    server.serve_forever()

Which basically does nothing but open a chunked response (for long-term running, so that we can exam the issues). And if I chain two layers of proxies in the following way:

class TestActor(val target: String)(implicit val system: ActorSystem) extends Actor
  with HttpService
  with ProxyDirectives
{
  // we use the enclosing ActorContext's or ActorSystem's dispatcher for our Futures and Scheduler
  implicit private def executionContext = actorRefFactory.dispatcher

  // the HttpService trait defines only one abstract member, which
  // connects the services environment to the enclosing actor or test
  def actorRefFactory = context

  val serviceRoute: Route = {
    get {
      proxyTo(target)
    }
  }

  // runs the service routes.
  def receive = runRoute(serviceRoute) orElse handleTimeouts

  private def handleTimeouts: Receive = {
    case Timedout(x: HttpRequest) =>
      sender ! HttpResponse(StatusCodes.InternalServerError, "Request timed out.")
  }
}

object DebugMain extends App {
  val actorName = "TestActor"
  implicit val system = ActorSystem(actorName)

  // create and start our service actor
  val service = system.actorOf(
    Props { new TestActor("http://127.0.0.1:8080") },
    s"${actorName}Service"
  )
  val service2 = system.actorOf(
    Props { new TestActor("http://127.0.0.1:8081") },
    s"${actorName}2Service"
  )

  IO(Http) ! Http.Bind(service, "::0", port = 8081)
  IO(Http) ! Http.Bind(service2, "::0", port = 8082)
}

Use curl http://localhost:8082 to connect to the proxy server, and you will see the Akka system keeps transferring data even after curl has been killed (you may turn on the logs of DEBUG level to see details).

How can I deal with this problem? Thanks.

平芜泫
  • 91
  • 4

1 Answers1

0

Well, it turns out to be a very complex problem, while my solution takes nearly 100 lines of codes.

Actually, the problem does not only exist when I'm stacking two layers of proxies. When I'm using one layer proxy, the problem does exist, but no log is printed, so I've not aware of this problem before.

The key problem is that while we use IO(Http) ! HttpRequest, it is actually a host-level API from spray-can. The connections of host-level APIs are managed by Spray HttpManager, which is not accessible by our code. Thus we can do nothing with that connection, unless we send a Http.CloseAll to IO(Http), which will cause all the upstream connections to be closed.

(If anyone knows how to get the connection from HttpManager, please tell me).

We have to use connection-level APIs from spray-can to serve for this situation. So I've come up with something like this:

/**
  * Proxy to upstream server, where the server response may be a long connection.
  *
  * @param uri Target URI, where to proxy to.
  * @param system Akka actor system.
  */
def proxyToLongConnection(uri: Uri)(implicit system: ActorSystem): Route = {
  val io = IO(Http)(system)

  ctx => {
    val request = reShapeRequest(ctx.request, uri)

    // We've successfully opened a connection to upstream server, now start proxying data.
    actorRefFactory.actorOf {
      Props {
        new Actor with ActorLogging {
          private var upstream: ActorRef = null
          private val upstreamClosed = new AtomicBoolean(false)
          private val clientClosed = new AtomicBoolean(false)
          private val contextStopped = new AtomicBoolean(false)

          // Connect to the upstream server.
          {
            implicit val timeout = Timeout(FiniteDuration(10, TimeUnit.SECONDS))
            io ! Http.Connect(
              request.uri.authority.host.toString,
              request.uri.effectivePort,
              sslEncryption = request.uri.scheme == "https"
            )
            context.become(connecting)
          }

          def connecting: Receive = {
            case _: Http.Connected =>
              upstream = sender()
              upstream ! request
              context.unbecome()  // Restore the context to [[receive]]

            case Http.CommandFailed(Http.Connect(address, _, _, _, _)) =>
              log.warning("Could not connect to {}", address)
              complete(StatusCodes.GatewayTimeout)(ctx)
              closeBothSide()

            case x: Http.ConnectionClosed =>
              closeBothSide()
          }

          override def receive: Receive = {
            case x: HttpResponse =>
              ctx.responder ! x.withAck(ContinueSend(0))

            case x: ChunkedMessageEnd =>
              ctx.responder ! x.withAck(ContinueSend(0))

            case x: ContinueSend =>
              closeBothSide()

            case x: Failure =>
              closeBothSide()

            case x: Http.ConnectionClosed =>
              closeBothSide()

            case x =>
              // Proxy everything else from server to the client.
              ctx.responder ! x
          }

          private def closeBothSide(): Unit = {
            if (upstream != null) {
              if (!upstreamClosed.getAndSet(true)) {
                upstream ! Http.Close
              }
            }
            if (!clientClosed.getAndSet(true)) {
              ctx.responder ! Http.Close
            }
            if (!contextStopped.getAndSet(true)) {
              context.stop(self)
            }
          }
        } // new Actor
      } // Props
    } // actorOf
  } // (ctx: RequestContext) => Unit
}

The code is little long, and I doubt there should be some more clean and simple implementation (actually I'm not familiar with Akka). Nevertheless, this code works, so I put this solution here. You may post your solution to this problem freely, if you've found some better one.

平芜泫
  • 91
  • 4