-3

This is a proxy grabber and tester script.

I have tried many things. I like Rolling Curl but I can not seem to get it working with this. Is there any way to speed this up or maybe throttle it with JavaScript? The processing time and resources are way too high!

Right now it will work only if there are one or two sources otherwise it just runs forever. There are tons of applications like this in PYTHON and Windows applications that harvest and check thousands of proxies. I just wonder if this is even possible for PHP to do the same.

// Settings
error_reporting(E_ALL);
ini_set('max_execution_time', 0);
require_once ('classes/class.multicurl.php');

set_time_limit(0);
// Short Delay
$delay = rand(2, 4);
// Long Delay
$longdelay = rand(4, 7);
// Checking Proxies
$fileName = "leeched/proxies.txt";
// where to save successful proxies
$success = "goodproxies/success.txt";
$source = file('sources/sources.txt');
// SET Cookie
$c = new Curl;
foreach($source as $sources) {
  // Request To Delete Duplicate Proxies
  $c->addRequest(trim($sources));
}
$c->chunk(25);
$c->perform();
$proxies = array();
foreach($c->results as $url => $res) {
  // REGEX MATCH
  preg_match_all('@[0-9]{1,4}\.[0-9]{1,4}\.[0-9]{1,4}\.[0-9]{1,4}:[0-9]    {1,6}@', $res, $m);
  $eachproxy = stream_get_contents($res);
  $proxies[$url] = $m[0]; {
    while ($proxies == time() && $eachproxy > 4) { // go into "waiting" when       we going to fast
      usleep(100000); // wait .1 second and ask again
    }
    if ($proxies != time()) { // remember to reset this second and the cnt
      $proxies = time();
      $eachproxy = 0;
    }
  }
  foreach($proxies as $url => $parr) {
    $str = implode("\n", $parr);
    file_put_contents('leeched/proxies.txt', $str);
    $k = count($parr);
    $str2 = date('h:i:s d m') . " | \t" . $k . "\t" . $url . "\n";
    file_put_contents('logs/counts.txt', $str2, FILE_APPEND);
  }
  $uar = file('leeched/proxies.txt');
  $uar = array_unique($uar);
  $str = implode("\n", $uar) . "\n";
  $str = preg_replace('/^\h*\v+/m', '', $str);
  file_put_contents('leeched/proxies.txt', $str);
}
// Proxy Testing
if (!is_file($fileName)) die('Proxy file not available');
$proxies = file($fileName);
for ($p = 0; $p < count($proxies); $p++) {
  $ch = curl_init(); //initizlize and set url
  curl_setopt($ch, CURLOPT_URL, "http://www.yordomain.com/check.php");
  curl_setopt($ch, CURLOPT_HEADER, 1);
  curl_setopt($ch, CURLOPT_HTTPGET, 1);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
  curl_setopt($ch, CURLOPT_HEADER, FALSE);
  curl_setopt($ch, CURLOPT_VERBOSE, TRUE);
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 7);
  curl_setopt($ch, CURLOPT_TIMEOUT, 5);
  curl_setopt($ch, CURLOPT_PROXY, trim($proxies[$p]));
  $data = curl_exec($ch);
  usleep(100000);
  if (strpos($data, 'Anonymous') !== false) {
    usleep(100000);
    echo "<img src=\"images/good.png\">&nbsp;&nbsp;<font  color=\"#7CFC00\"><strong>" . $proxies[$p] . " </font></strong><font color=\"#FFFFE0\"><strong>   THIS IS A WORKING ANONYMOUS PROXY SAVED TO /goodproxies/success.txt</font></strong><font color=\"yellow\"><strong> " . "Total time: " . curl_getinfo($ch, CURLINFO_TOTAL_TIME) . " seconds!</font></strong><img src=\"images/small.png\"> <br/><br/>";
    $f = fopen($success, "a");
    fwrite($f, $proxies[$p]);
    fclose($f);
  }
  elseif (curl_errno($ch)) {
    usleep(100000);
    echo "<img src=\"images/bad.png\">&nbsp;&nbsp;<font color=\"white\"><strong>" . $proxies[$p] . " </font></strong><font color=\"red\"><strong>ERROR:</font></strong><font color=\"#00FFFF\"><strong> " . curl_error($ch) . " </font></strong><img src=\"images/redx.png\"> <br/><br/>";
  }
  else {
    echo "<img src=\"images/warning.png\">&nbsp;&nbsp;<font color=\"#7CFC00\"><strong> " . $proxies[$p] . "   </font></strong><font color=\"white\"><strong> THERE WAS NO ERROR CONNECTING BUT THIS PROXY IS NOT ANONYMOUS!  NOT SAVED</font></strong> <font color=\"#FF69B4\"><strong>(No content from source)</font></strong><img src=\"images/redx.png\"> <br/><br/>";
  }
  flush();
  curl_close($ch);
}
$done = "done";
echo $done;
mplungjan
  • 169,008
  • 28
  • 173
  • 236
  • You want to speed it up or throttle it? Which is it? – AmericanUmlaut Mar 11 '17 at 05:58
  • Right now it will work only if there are one or two sources otherwise it just runs forever. There are tons of applications like this in PYTHON and Windows applications that harvest and check thousands of proxies. I just wonder if this is even possible for PHP to do the same. – Jeff Childers Mar 11 '17 at 06:03

1 Answers1

0

it'd go faster if you'd do curl_setopt($ch, CURLOPT_ENCODING, ''); and your libcurl is compiled with gzip/deflate support, and the target website supports at least 1 of those (which is pretty much always the case)

hanshenrik
  • 19,904
  • 4
  • 43
  • 89