3

I'm trying to save a website as PDF through printing dialog. My code allows me to save as pdf, but asks me to input a filename, which I don't know how to pass a filename to the pop up box. Attached is my code:

import time
from selenium import webdriver
import os

class printing_browser(object):
    def __init__(self):
        self.profile = webdriver.FirefoxProfile()
        self.profile.set_preference("services.sync.prefs.sync.browser.download.manager.showWhenStarting", False)
        self.profile.set_preference("pdfjs.disabled", True)
        self.profile.set_preference("print.always_print_silent", True)
        self.profile.set_preference("print.show_print_progress", False)
        self.profile.set_preference("browser.download.show_plugins_in_list",False)
        foxdriver = r'C:\Users\AShen\Documents\Workspace\geckodriver.exe'
        self.driver = webdriver.Firefox(executable_path=foxdriver,firefox_profile = self.profile)
        time.sleep(5)

    def get_page_and_print(self, page):
        self.driver.get(page)
        time.sleep(5)
        self.driver.execute_script("window.print();")

if __name__ == "__main__":
    browser_that_prints = printing_browser()
    browser_that_prints.get_page_and_print('http://www.google.com/')
Alice Shen
  • 41
  • 1
  • 5
  • Sorry, but could you explain what you mean by `the pop up box`? Maybe explain a little bit further the flow. – guzmonne Jul 19 '19 at 00:02
  • 1
    Since selenium uses the page title as the PDF filename, so just change the page title to the name you want to give your PDF before printing . `driver.execute_script('document.title="{}";'.format(YOUR_PDF_NAME)); driver.execute_script('window.print();')` – iMath May 01 '21 at 09:34

2 Answers2

4

These days I had the same question. I solved it without using the pyautogui in these case, because I use different PCs and monitors and I didn't want to depend on the position of the click.

I was able to solve it using the about:config... changing them with each necessary print (in PDF).

The name of my printer "in PDF" in Ubuntu is "Print to File" (defined in print_printer) and the settings of about:config need to be this printer... For example: print.printer_Print_to_File.print_to_file: true

import os
import time
from selenium import webdriver

class printing_browser(object):
    def __init__(self):
        self.profile = webdriver.FirefoxProfile()
        self.profile.set_preference('services.sync.prefs.sync.browser.download.manager.showWhenStarting', False)
        self.profile.set_preference('pdfjs.disabled', True)
        self.profile.set_preference('print.always_print_silent', True)
        self.profile.set_preference('print.show_print_progress', False)
        self.profile.set_preference('browser.download.show_plugins_in_list', False)
        
        self.profile.set_preference('browser.download.folderList', 2)
        self.profile.set_preference('browser.download.dir', '')
        self.profile.set_preference('browser.download.manager.showWhenStarting', False)
        self.profile.set_preference('browser.aboutConfig.showWarning', False)
        
        self.profile.set_preference('print.print_headerright', '')
        self.profile.set_preference('print.print_headercenter', '')
        self.profile.set_preference('print.print_headerleft', '')
        self.profile.set_preference('print.print_footerright', '')
        self.profile.set_preference('print.print_footercenter', '')
        self.profile.set_preference('print.print_footerleft', '')
        self.profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/octet-stream;application/vnd.ms-excel;text/html')
        
        foxdriver = r'C:\Users\AShen\Documents\Workspace\geckodriver.exe'
        self.driver = webdriver.Firefox(
            executable_path=foxdriver,
            firefox_profile=self.profile
        )
        time.sleep(1)

    def get_page_and_print(self, page, filepath):
        # Get about:config
        self.driver.get('about:config')
        time.sleep(1)

        # Define Configurations
        script = """
        var prefs = Components.classes['@mozilla.org/preferences-service;1'].getService(Components.interfaces.nsIPrefBranch);
        prefs.setBoolPref('print.always_print_silent', true);
        prefs.setCharPref('print_printer', 'Print to File');
        prefs.setBoolPref('print.printer_Print_to_File.print_to_file', true);
        prefs.setCharPref('print.printer_Print_to_File.print_to_filename', '{}');
        prefs.setBoolPref('print.printer_Print_to_File.show_print_progress', true);
        """.format(filepath)

        # Set Configurations
        self.driver.execute_script(script)
        time.sleep(1)

        # Get site to print in pdf
        self.driver.get(page)
        time.sleep(2)
        self.driver.execute_script("window.print();")

        

browser_that_prints = printing_browser()
browser_that_prints.get_page_and_print('http://www.google.com', os.path.join(os.getcwd(), 'mywebpage.pdf'))
Michel Metran
  • 513
  • 5
  • 15
  • I see you adjusted the margins. Can we also convert a known screen width/height into page size so that the PDF will print in the same size/shape as the webpage without any bleed? – MrChadMWood May 18 '23 at 16:38
0

Oh, it is very easy if you know about pyautogui. This is an amazing module that allows you to automate your curser. So essentially, you need to figure out the place where the popup appears and use pyautogui to click it for you. All you need to add is:

time.sleep(3)

i=random.randint(0,1000)
file_name=('name_pdf '+str(i))
print (file_name)


pyautogui.typewrite(file_name)
pyautogui.click(512,449)

Entire code structure will look like this:

import time
import pyautogui
from selenium import webdriver
import os

class printing_browser(object):
    def __init__(self):
        self.profile = webdriver.FirefoxProfile()
        self.profile.set_preference("services.sync.prefs.sync.browser.download.manager.showWhenStarting", False)
        self.profile.set_preference("pdfjs.disabled", True)
        self.profile.set_preference("print.always_print_silent", True)
        self.profile.set_preference("print.show_print_progress", False)
        self.profile.set_preference("browser.download.show_plugins_in_list",False)
        foxdriver = r'C:\Users\Pranjal Pathak\Desktop\Titanic Kaggle\geckodriver.exe'
        self.driver = webdriver.Firefox(executable_path=foxdriver,firefox_profile = self.profile)
        time.sleep(5)

    def get_page_and_print(self, page):
        self.driver.get(page)
        time.sleep(5)
        self.driver.execute_script("window.print();")

if __name__ == "__main__":
    browser_that_prints = printing_browser()
    browser_that_prints.get_page_and_print('http://www.python.org/')

time.sleep(3)

i=random.randint(0,1000)
file_name=('name_pdf '+str(i))
print (file_name)


pyautogui.typewrite(file_name)
pyautogui.click(512,449)

Note: 1. I have selected the name of the file as name+any random integer between 1 to 1000 to change name every time you save the file. This way it will save every time you run the code as the names will be different every time.

  1. If this types the name but does not save the file, you might want to change the coordinates of your curser. Let me know if that happens.
Pranjal Pathak
  • 28
  • 1
  • 1
  • 7