The premise is:
- List all files.
- If a file has no 'parents' field, it means it's an orphan file.
- So, the script deletes them.
Before to start you need:
Ready for copy paste demo
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive']
def callback(request_id, response, exception):
if exception:
print("Exception:", exception)
def main():
"""
Description:
Shows basic usage of the Drive v3 API to delete orphan files.
"""
""" --- CHECK CREDENTIALS --- """
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
""" --- OPEN CONNECTION --- """
service = build('drive', 'v3', credentials=creds)
page_token = ""
files = None
orphans = []
page_size = 100
batch_counter = 0
print("LISTING ORPHAN FILES")
print("-----------------------------")
while (True):
# List
r = service.files().list(pageToken=page_token,
pageSize=page_size,
fields="nextPageToken, files"
).execute()
page_token = r.get('nextPageToken')
files = r.get('files', [])
# Filter orphans
# NOTE: (If the file has no 'parents' field, it means it's orphan)
for file in files:
try:
if file['parents']:
print("File with a parent found.")
except Exception as e:
print("Orphan file found.")
orphans.append(file['id'])
# Exit condition
if page_token is None:
break
print("DELETING ORPHAN FILES")
print("-----------------------------")
batch_size = min(len(orphans), 100)
while(len(orphans) > 0):
batch = service.new_batch_http_request(callback=callback)
for i in range(batch_size):
print("File with id {0} queued for deletion.".format(orphans[0]))
batch.add(service.files().delete(fileId=orphans[0]))
del orphans[0]
batch.execute()
batch_counter += 1
print("BATCH {0} DELETED - {1} FILES DELETED".format(batch_counter,
batch_size))
if __name__ == '__main__':
main()
This method won't delete files in the root directory, as they have the 'root' value for the field 'parents'. If not all your orphan files are listed, it means they are being automatically deleted by google. This process might take up to 24h.