1

Iam trying to write my own python script to find an account top followed followers, and it seems to work fine, however after a while or after running the script more than 1-2 times, instagram gives me a try again error, which ive searched and found its Instagram temporarily blocking my ip as i have given to many requests at once.

Does anyone know a way to get around this?

MY CODE

"""
WHAT DOES THIS SCRIPT ACTUALLY DO?:
This script enables you to scrape all your followers and then find X top followed followers.
--------------------------------------------------------------------------------------------
NOTICE:
Unfortunately it is very hard now a days to scrape social media sites, due to 
timeout issues, to many pings in a set time and other request restrictions.
So this script can only be ran 1-3 times a day.
I've tried also using exciting API's but all these are either too slow, or simply 
show a '428' to many requests error.
"""

import instaloader
from selenium import webdriver
import time
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from rich.console import Console
from rich.table import Column, Table


# Global vars
L = instaloader.Instaloader()
URL = "https://www.instagram.com/{}/"
usernameGlobal = None
passwordGlobal = None
console = Console()


def get_followers():

    # Login
    while True: # Keep running if password/username was wrong
        try:
            global usernameGlobal, passwordGlobal
            
            print("\n"+"*-=-*"*5)
            usernameGlobal = input("> Enter your username: ")
            passwordGlobal = input("> Enter your password: ")
            
            L.login(usernameGlobal, passwordGlobal)
            
            print("\n"+"-"*28+"\n> Successfully Logged In!")
            print("> Please leave this program running in the background")
            print("> Until you see the 'FINISHED' message'"+"\n"+"-"*28)
            break
        except:
            print("\n"+"-"*28+"\n> Wrong Username / Password"+"\n"+"-"*28)
    
    # Obtain profile metadata
    profile = instaloader.Profile.from_username(L.context, usernameGlobal)
    
    follow_list = []
    
    # Loop through each follower and add to list
    for followee in profile.get_followers():
        follow_list.append(followee.username)
    
    return follow_list


def scrape_data(username):
    
    driver.get(URL.format(username))
    FOLLOWERS = 0
    
    try:
        try:
            FOLLOWERS = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/header/section/ul/li[2]/a/span').text
        except: # For people who you don't follow but follow you and have private accounts
            FOLLOWERS = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/header/section/ul/li[2]/span/span').text
    except:
        print("\n"+"-"*28+"\n> Please try this script again later!"+"\n"+"-"*28)
    
    result = ''.join([i for i in FOLLOWERS if i.isdigit()])
    
    return int(float(result))


def driver_login():
    
    driver.get("https://www.instagram.com")
    time.sleep(3)

    element = driver.find_element_by_xpath("//input[@name='username']")
    element.send_keys(usernameGlobal)

    element = driver.find_element_by_xpath("//input[@name='password']")
    element.send_keys(passwordGlobal)

    element.send_keys(Keys.RETURN)
    
    time.sleep(3)
    
    # -- This is for if you have two factor authentication enabled --
    # element = driver.find_element_by_xpath("//input[@name='verificationCode']")
    # key = input("Enter Activation key: ")
    # element.send_keys(key)
    
    # element.send_keys(Keys.RETURN)
    
    # time.sleep(3)


def output_result(size, result):
    
    n_input = 0
    
    # Get user to select how many of the top followed followers they want
    while True:
        try:
            print("\n"+"*-=-*"*10)
            n_input = int(input("> How many of your top followed followers do you want to see?\n> E.g 5 for top 5.\n> "))
            
            if n_input > size:
                continue
            
            break
        except:
            print("\n"+"-"*28+"\n> Invalid input. (Must be a number & less then your follower count)"+"\n"+"-"*28)
    
    # Make the table for a clean user friendly output and print it out
    table = Table(show_header=True, header_style="bold magenta")
    table.add_column("Your Followers", style="dim", width=12)
    table.add_column("There Follower Count")

    for x in range(n_input):
        table.add_row(
            list(result.keys())[x-1],
            list(result.values())[x-1]
        )

    console.print(table)
    return

    

if __name__ == "__main__":
    
    list_of_followers = get_followers()

    # Initialize the selenium driver
    driver = webdriver.Chrome(ChromeDriverManager().install())
    
    driver_login()
    
    result = {}
    
    for follower in list_of_followers:
        followers = scrape_data(follower)
        
        result[follower] = followers
        
    # Sort the dictionary by descending order
    result = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
    print("\n> FINISHED")
    
    driver.quit()
    
    output_result(len(list_of_followers), result)
    exit(0)
    
  • If you work for Instagram, then have a chat with OPS team, they may give you an optimal solution. if you don't work for Instagram, don't scrape it. – cruisepandey Aug 31 '21 at 09:03

1 Answers1

0

You can potentially make unlimited requests if you use proxies. You can buy thousands of proxies from various sites and rotate them in a dictionary.

Simply add a list of proxies to your GET request and enjoy:

proxyDict = { 
              "http"  : http_proxy, 
              "https" : https_proxy, 
              "ftp"   : ftp_proxy
            }

r = requests.get(url, headers=headers, proxies=proxyDict)

Also for Selenium, from this answer:

PROXY = "1.111.111.1:8080" #your proxy

chrome_options = WebDriverWait.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)

chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get("instagram.com")
isopach
  • 1,783
  • 7
  • 31
  • 43
  • so i should use the bottom code and apple o my selenium driver right? do you any any good free proxy sites? – mathewsjoyy Sep 10 '21 at 18:59
  • @barrybeers1998 yes, just add the proxy argument to your chrome_options. And no I don't know of any, only paid ones for datacenters. – isopach Sep 10 '21 at 19:00
  • when i search free proxy id https://www.google.com/search?q=free+proxy+id&oq=free+pr&aqs=chrome.0.69i59l2j69i57j69i61l3.1638j0j1&sourceid=chrome&ie=UTF-8 It shows a lot of ips and ports can you use these? – mathewsjoyy Sep 10 '21 at 19:37
  • @barrybeers1998 Sure if you don't mind your traffic being seen by the owners of each proxy. – isopach Sep 10 '21 at 19:39
  • @barrybeers1998 You need more reputation to up- or downvote, but if it solves your problem you can accept it (click on the checkmark) and so remove your question from the "unanswered" queue. If you need more help with this issue, please adapt your question accordingly. – isopach Sep 13 '21 at 13:22