2

I am trying to use torpy to query Bitcoin balances over tor.

from torpy.http.requests import TorRequests
import json

addr1 = r'34xp4vRoCGJym3xR7yCVPFHoCNxv4Twseo'
addr2 = r'bc1qgdjqv0av3q56jvd82tkdjpy7gdp9ut8tlqmgrpmv24sq90ecnvqqjwvw97'

with TorRequests() as tor_requests:
  print("establish circuit")
  with tor_requests.get_session() as sess:
    for addr in [addr1, addr2]:
      val = sess.get(f"https://api.blockcypher.com/v1/btc/main/addrs/{addr}") # https://stackoverflow.com/a/71704333
      mydict = json.loads(val.text)
      balance = mydict['balance']/(10**8)
      print(f'{addr} balance: {balance:.8f} BTC')

This works the first time I run it; it returns the balances. However, if I run it a second time, I receive a long traceback:

Stream #4: closed (but received b'\xca4{\xee\xa6\xb0K\x8fq\xea\xb9p\x81\tr*\x15\x80\xa9 zA\x08\xe9^u\x9b\xd8V,\xe8\xd2=V\xdd\x12\xe2\x9d\xfdm\xef\xbc\xaf\\9\xeb\xbc\x9f\xaa\xc3XR\x95K\xc9\x0b\xe7\x0bv\xa8:f\xd8\x8cj>\x14\xcao:0XQ\xc8\x7f\xe3{\xfb4`&\xf5\xa6\x9ez\x9e>!\x0c\xa6\xee$&Vs\x1b\x16l\xe7]7\xe4\xb4o\x8f\xcbO\xc5\xd7\xaf\x9f\x8e7\xd8\xe7\xd1\x91\xe0}VBY\xc1W\x1a\xf9)\x04\x0b\x9c\x18\x07~\xc7\x9f\xd8!\xdb^\x8a\xa4h\xb7\xb9\x98\x122\x07\x8ft1\t\x16\xaf\xb2\x05W\xb1U\xd7\xfa[\xcdn\xecR\xd6\xcfo\xd8SgJY\xe4tf~yA\x07f\x83%\xbc\xbd\x04\x92.-\x1dr\xe8\xd4{\xe2|hY\xbf\x00S\xbf\xdd\xdal\x9eY\xa1^\xf42\xc5V\xf4\xa3\x8bd\x90t\xe2m\xbb\x87e0\x956\xb7W\xde\xb1/\xd3\x9e\xf2\xbb4\xd8\x1b\xe3\xd1j8\xf6\x17\xc6^\xcf\nJw\xe0g\xf7\xcb5;\r\x99h\x87\xd2r|\xe7\xc1{\xc1\xc08O-\xc3\xdeo\x7f\xbfc\xcc\x9c\x14\xfa\xd9\x13\xaf0\x1d\xab\x9b\x10\xa75\xd7\xea\x16\x91\xb8l\xb1$\x06nW\xcb\x82\xe3>T\xdf\xc0N\xc9\xc0>\xed\xfaND%\xbe\xbd\xee\xe1\x8don\xc4y\xd8\x9a\x99\xa0\xe1\x8d\n*9n\xaa\xb5/B\xec\xbb\xfbr\x0fK4\xab\xebi,\xcaa\xb1+\xb2RG\xe8\t\xb29w\x1a\xfcC\x91\xb6L\xbd\xa9B\xfc\xf4\x08+\t\xed\x87\xe5\x81 \xad\x9a-\xcaS\x18\xc0\x93\x08]M\x87`\x80?\xc1W\x03\xf1\x94\x01\x17\x8a\x13\xb4\x87\xcd\x99\xf7\xb9\xa2&\x82\xf4\x9b\xf8\x80\xcfc\x02\x16\xf4\x0e\xab\x82\xc9\x0bn\x06U\x10:\x842tRy.\x8eg\x15\x1a\xe1\x89\x00\xd4\xd69\x12\xe5#\x93\xaa\x89\x01Y15YD\x8c/N\xcc\xcf\x97\xfb\x14\x04\x0fe\xc9\xa4)\xee\xe4\x9fO\xd4\xcf\x1ek\x07\x8cq\xf32<m\xa3J\xa7\x80')
Stream #4 is already closed or was never opened (but received CellRelay(inner_cell = CellRelayData(data = ... (498 bytes)), stream_id = 4, digest = b'\xc1\xbb\x92T', circuit_id = 80000001))
Stream #4: closed already
Stream #4: closed already
Traceback (most recent call last):
  File "C:\Program Files\Python\Python38\lib\site-packages\urllib3\response.py", line 443, in _error_catcher
    yield
  File "C:\Program Files\Python\Python38\lib\site-packages\urllib3\response.py", line 818, in read_chunked
    chunk = self._handle_chunk(amt)
  File "C:\Program Files\Python\Python38\lib\site-packages\urllib3\response.py", line 771, in _handle_chunk
    returned_chunk = self._fp._safe_read(self.chunk_left)
  File "C:\Program Files\Python\Python38\lib\http\client.py", line 610, in _safe_read
    raise IncompleteRead(data, amt-len(data))
http.client.IncompleteRead: IncompleteRead(634 bytes read, 3974 more expected)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Program Files\Python\Python38\lib\site-packages\requests\models.py", line 753, in generate
    for chunk in self.raw.stream(chunk_size, decode_content=True):
  File "C:\Program Files\Python\Python38\lib\site-packages\urllib3\response.py", line 623, in stream
    for line in self.read_chunked(amt, decode_content=decode_content):
  File "C:\Program Files\Python\Python38\lib\site-packages\urllib3\response.py", line 844, in read_chunked
    self._original_response.close()
  File "C:\Program Files\Python\Python38\lib\contextlib.py", line 131, in __exit__
    self.gen.throw(type, value, traceback)
  File "C:\Program Files\Python\Python38\lib\site-packages\urllib3\response.py", line 460, in _error_catcher
    raise ProtocolError("Connection broken: %r" % e, e)
urllib3.exceptions.ProtocolError: ('Connection broken: IncompleteRead(634 bytes read, 3974 more expected)', IncompleteRead(634 bytes read, 3974 more expected))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "E:\Examples\Python\torpy\check_btc_addr.py", line 50, in <module>
    val = sess.get(f"https://api.blockcypher.com/v1/btc/main/addrs/{addr}") # https://stackoverflow.com/a/71704333
  File "C:\Program Files\Python\Python38\lib\site-packages\requests\sessions.py", line 555, in get
    return self.request('GET', url, **kwargs)
  File "C:\Program Files\Python\Python38\lib\site-packages\requests\sessions.py", line 542, in request
    resp = self.send(prep, **send_kwargs)
  File "C:\Program Files\Python\Python38\lib\site-packages\requests\sessions.py", line 697, in send
    r.content
  File "C:\Program Files\Python\Python38\lib\site-packages\requests\models.py", line 831, in content
    self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
  File "C:\Program Files\Python\Python38\lib\site-packages\requests\models.py", line 756, in generate
    raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ('Connection broken: IncompleteRead(634 bytes read, 3974 more expected)', IncompleteRead(634 bytes read, 3974 more expected))

I don't really understand why it doesn't work if I run it multiple times. Anybody know what's going on here, and how to fix it?

Rubén
  • 34,714
  • 9
  • 70
  • 166
joejoejoejoe4
  • 1,206
  • 1
  • 18
  • 38

1 Answers1

1

It's protected by Cloudflare. The 634 bytes, when decoded, gives:

<!DOCTYPE html>
<html lang="en-US">
<head>
    <title>Just a moment...</title>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=Edge">
    <meta name="robots" content="noindex,nofollow">
    <meta name="viewport" content="width=device-width,initial-scale=1">
    <link href="/cdn-cgi/styles/challenges.css" rel="stylesheet">
    

</head>
<body class="no-js">
    <div class="main-wrapper" role="main">
    <div class="main-content">
        <h1 class="zone-name-title h1">
            <img class="heading-favicon" src="/favicon.ico"
                 onerror="this.onerror=null;this.parentNode.removeChild(this)">
            api.blockcypher.com
        </h1>
        <h2 class="h2" id="challenge-running">
            Checking if the site connection is secure
        </h2>
        <noscript>
            <div id="challenge-error-title">
                <div class="h2">
                    <span class="icon-wrapper">
                        <div class="heading-icon warning-icon"></div>
                    </span>
                    <span id="challenge-error-text">
                        Enable JavaScript and cookies to continue
                    </span>
                </div>
            </div>
        </noscript>

You can work around it by monkey-patching:

  1. http.client.HTTPResponse._safe_read to handle truncated response instead of raise IncompleteRead,
  2. urllib3.response.HTTPResponse._update_chunk_length to handle truncated response instead of raise InvalidChunkLength, and
  3. torpy.http.requests.Session to use requests-html's HTMLSession to render if blocked by Cloudflare.
import http.client
def _safe_read(self, amt):
    s = []
    while amt > 0:
        chunk = self.fp.read(min(amt, http.client.MAXAMOUNT))
        if not chunk:
            # raise IncompleteRead(b''.join(s), amt)  # -
            break                                     # +
        s.append(chunk)
        amt -= len(chunk)
    return b"".join(s)
http.client.HTTPResponse._safe_read = _safe_read

import urllib3.response
from urllib3.exceptions import InvalidChunkLength
def _update_chunk_length(self):
    if self.chunk_left is not None:
        return
    line = self._fp.fp.readline()
    line = line.split(b";", 1)[0]
    if line == b"":          # +
        self.chunk_left = 0  # +
        return               # +
    try:
        self.chunk_left = int(line, 16)
    except ValueError:
        self.close()
        raise InvalidChunkLength(self, line)
urllib3.response.HTTPResponse._update_chunk_length = _update_chunk_length

import torpy.http.requests
from requests_html import HTMLSession
torpy.http.requests.Session = HTMLSession
from torpy.http.requests import TorRequests
import json

addr1 = r'34xp4vRoCGJym3xR7yCVPFHoCNxv4Twseo'
addr2 = r'bc1qgdjqv0av3q56jvd82tkdjpy7gdp9ut8tlqmgrpmv24sq90ecnvqqjwvw97'

with TorRequests() as tor_requests:
    print("establish circuit")
    with tor_requests.get_session() as sess:
        for addr in [addr1, addr2]:
            resp = sess.get(f"https://api.blockcypher.com/v1/btc/main/addrs/{addr}")
            # mydict = json.loads(resp.text)         # -
            if resp.status_code == 200:              # +
                mydict = json.loads(resp.text)       # +
            elif resp.status_code == 403:            # +
                resp.html.render()                   # +
                mydict = json.loads(resp.html.text)  # +
            else:                                    # +
                raise NotImplementedError            # +
            balance = mydict['balance']/(10**8)
            print(f'{addr} balance: {balance:.8f} BTC')

Note: print(repr(resp.headers['Server'])) gives 'cloudflare'.

Also note: I tried cloudscraper to no avail.

aaron
  • 39,695
  • 6
  • 46
  • 102