1

I'm quite new to Perl and Python. I have to convert some old functions created in Perl to Python. I am struggling to find the python equivalent for modules like -HTML::Form->parse() -{ua}->simple_request() etc.

I had gone through modules like beautifulsoup which come in handy to parse data from an html page.

But i need to like use user agent modules constantly throughout the code but I am unable to find a perfect alternative in python.

The code in perl is initialised as follows:

sub new {
    my ($class, %args) = @_;
    $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
    my $self = { # default args:
#                 ip        => '10.10.10.10',
                port        => 443,
        transparent => 0,
#       logger      => 
        user_agent  => "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
#       user_agent  => "mybrowser",
        ssl_ver         => '23',
                %args,
               };

    unlink "cookies.txt";
    $self->{ua} = LWP::UserAgent->new(keep_alive => 10);
    $self->{ua}->agent($self->{user_agent});
    Net::SSL::send_useragent_to_proxy(1);
    $self->{ua}->timeout(90 * 1);
#   $self->{ua}->timeout(200 * 1);
    $ENV{'HTTPS_VERSION'} = $self->{ssl_ver};
    my $cookie_jar = HTTP::Cookies->new(
        file        => "cookies.txt",
        hide_cookie2    => 1,
#           autosave    => 1,
    );

    $self->{ua}->cookie_jar($cookie_jar);

    # Set proxy
    if (! $self->{transparent}) {
        my $proxy = 'http://' . $self->{ip} . ':' . $self->{port};  # don't add .'/' !
        $self->{logger}->Log("Set UA proxy: $proxy", 4);
        $self->{ua}->proxy('http', $proxy);
        $self->{ua}->proxy('https', $proxy);
#       $ua->proxy('https', $proxy);    # break authentication
        $ENV{'HTTPS_PROXY'} = $proxy;
        $self->{logger}->Log("Set HTTPS proxy: $ENV{'HTTPS_PROXY'}", 4);
        $self->{proxy} = $proxy;
    }

=head
    my $context = new IO::Socket::SSL::SSL_Context(
          SSL_version => 'TLSv1',
          SSL_verify_mode => Net::SSLeay::VERIFY_NONE(),
          );
        IO::Socket::SSL::set_default_context($context);
=cut
    @LWP::Protocol::http::EXTRA_SOCK_OPTS = (LocalAddr => $self->{init}->{client_ip},
                        SSL_version => $self->{ssl_ver},
                        SSL_cipher_list => $self->{ssl_cipher});

        bless $self, $class or die "Can't bless $class: $!";
        return $self;
}

Now this goes for the initialisation part, but main issue arrives while using the modules like:

my $form = HTML::Form->parse($res);
if (condition){
      $post = $form->make_request;
}
$res = $self->{ua}->simple_request($post);
$self->{ua}->no_proxy("10.x.x.x", "test.com", "10.x.x.x", "10.x.x.x", "10.x.x.x", "tests.com", "dummy.com");

...
$req->authorization_basic($login,$password);
$res = $self->{ua}->simple_request($req);


....

$req = $self->GetCommonRequest( $url );
        $req->authorization_basic($login,$password);
        $req->header(Content_Type => 'application/x-www-form-urlencoded',
            Accept => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding' => 'gzip, deflate',
            Host => $host);
...

Such places where user of {ua} modules are being used like simple_request, no_proxy and modules like authorization_basic are used. I am unable to find the python equivalent for these.

I shall really grateful if someone can let me know the python equivalent for these modules.

Big thanks in advance.

juanpa.arrivillaga
  • 88,713
  • 10
  • 131
  • 172
  • This question is too broad for Stack Overflow format and should be closed; but have a look at [`mechanize`](https://mechanize.readthedocs.io/en/latest/) for starters. – Selcuk Nov 11 '19 at 06:42
  • please always use the generic [python] tag when asking Python questions. – juanpa.arrivillaga Nov 11 '19 at 07:17

1 Answers1

0

Try to use something like this:

from urllib2 import urlopen, URLError, HTTPError, Request
from httplib import BadStatusLine, IncompleteRead

# url -- the URL you're trying to access
# data -- some params you want to POST
try :
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11',
        'accept'    : 'application/json',
    }
    headers['Authorization'] = 'Bearer %s' % self.auth[nickname]['access_token']

    if data is None :   # GET method
        req = Request( url, None, headers)
    else :  # POST method
        headers['Content-Type'] = 'application/json'
        data = json.dumps(data).encode('utf-8')
        req = Request( url, data, headers)

    result = urlopen( req ).read()

    print result
    return json.loads( result )

except HTTPError, e:
    log( 'HTTP error: ' + str(e.code) )
    result = e.read()
    print result
    return json.loads( result )
except URLError, e:
    log_this( 'unable to reach a server: ' + str(e.reason) )
except BadStatusLine, e:
    log_this( 'Bad Status Line' )
except IncompleteRead, e :
    log_this( 'IncompleteRead: ' + str(e) )
except Exception, e :
    log_this( str(e) + ': ' + url )
    log_this( traceback.format_exc() )
lenik
  • 23,228
  • 4
  • 34
  • 43