Skip to content

Commit

Permalink
Fixed download of files with invalid char in name, correctly set file…
Browse files Browse the repository at this point in the history
… ending for google doc file types, don't enable abuse flag by default allow it to be passed as a parameter. (#7)

closes #5
  • Loading branch information
goldbattle authored Apr 20, 2021
1 parent 729e5f1 commit 129c61c
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ tokens/
drivedl.egg-info/
dist/
build/
venv/
.idea/

credentials.json
config.json
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ Note that on the first run, you will have to authorize the scope of the applicat
Adding an argument `--skip` to your command will skip existing files and not redownload them.
- By default the behaviour is to download everything without skipping.

## File Abuse

Adding an argument `--abuse` allows for downloading files which have been marked as "abused" from google.
This acknowledges that you will download a file which google has marked that it could be malware or spam.
An example error can be found on [this](https://github.com/prasmussen/gdrive/issues/182).

## Assigning extra processes:

Adding an argument `--proc` followed by an integer of processes to assign the application will spawn the specified processes to do the download. Default process count is 5 processes
Expand Down
12 changes: 9 additions & 3 deletions drivedl/drivedl.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def main(console_call=True):
search = False
skip = False
noiter = False
abuse = False

# File Listing
if len(sys.argv) < 2:
Expand All @@ -102,6 +103,9 @@ def main(console_call=True):
if '--skip' in sys.argv:
skip = True
sys.argv.remove('--skip')
if '--abuse' in sys.argv:
abuse = True
sys.argv.remove('--abuse')
if '--debug' in sys.argv:
util.DEBUG = True
sys.argv.remove('--debug')
Expand Down Expand Up @@ -134,15 +138,16 @@ def build_files():
path = ["".join([c for c in dirname if c.isalpha() or c.isdigit() or c in [' ', '-', '_', '.', '(', ')', '[', ']']]).rstrip() for dirname in path]
for f in files:
dest = os.path.join(destination, os.path.join(*path))
file_dest.append((service, f, dest, skip))
f['name'] = "".join([c for c in f['name'] if c.isalpha() or c.isdigit() or c in [' ', '-', '_', '.', '(', ')', '[', ']']]).rstrip()
file_dest.append((service, f, dest, skip, abuse))
if file_dest != []:
# First valid account found, break to prevent further searches
return True
except ValueError: # mimetype is not a folder
dlfile = service.files().get(fileId=folderid, supportsAllDrives=True).execute()
print(f"\nNot a valid folder ID. \nDownloading the file : {dlfile['name']}")
# Only use a single process for downloading 1 file
util.download(service, dlfile, destination, skip)
util.download(service, dlfile, destination, skip, abuse)
sys.exit(0)
except HttpError:
print(f"{Fore.RED}File not found in account: {acc}{Style.RESET_ALL}")
Expand Down Expand Up @@ -171,7 +176,8 @@ def build_files():

if service == None:
# No accounts found with access to the drive link, exit gracefully
print("No valid accounts with access to the file/folder. Exiting...")
print("No valid accounts with access to the file/folder.")
print("Have you run the drivedl --add command? Exiting...")
sys.exit(1)
try:
p = Pool(PROCESS_COUNT)
Expand Down
24 changes: 21 additions & 3 deletions drivedl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,25 @@ def querysearch(service, name=None, drive_id=None, is_folder=None, parent=None,
break
return items

def download(service, file, destination, skip=False, noiter=False):
def download(service, file, destination, skip=False, abuse=False, noiter=False):
# add file extension if we don't have one
mimeType = file['mimeType']
if "application/vnd.google-apps" in mimeType:
if "form" in mimeType: return -1
elif "document" in mimeType:
ext_file = '.docx'
elif "spreadsheet" in mimeType:
ext_file = '.xlsx'
elif "presentation" in mimeType:
ext_file = '.pptx'
else:
ext_file = '.pdf'
root, ext = os.path.splitext(file['name'])
if not ext:
file['name'] = file['name'] + ext_file
# file is a dictionary with file id as well as name
if skip and os.path.exists(os.path.join(destination, file['name'])):
return -1
mimeType = file['mimeType']
if "application/vnd.google-apps" in mimeType:
if "form" in mimeType: return -1
elif "document" in mimeType:
Expand All @@ -104,7 +118,7 @@ def download(service, file, destination, skip=False, noiter=False):
else:
dlfile = service.files().export_media(fileId=file['id'], mimeType='application/pdf')
else:
dlfile = service.files().get_media(fileId=file['id'], supportsAllDrives=True, acknowledgeAbuse=True)
dlfile = service.files().get_media(fileId=file['id'], supportsAllDrives=True, acknowledgeAbuse=abuse)
rand_id = str(uuid.uuid4())
os.makedirs('buffer', exist_ok=True)
fh = io.FileIO(os.path.join('buffer', rand_id), 'wb')
Expand All @@ -116,6 +130,10 @@ def download(service, file, destination, skip=False, noiter=False):
try:
status, done = downloader.next_chunk()
except Exception as ex:
if "abuse" in str(ex).lower():
if not noiter: print()
print(f"{Fore.RED}Abuse error for file{Style.RESET_ALL} {file['name']} ...")
rate_limit_count = 21
DEBUG_STATEMENTS.append(f'File Name: {file["name"]}, File ID: {file["id"]}, Exception: {ex}')
rate_limit_count += 1
fh.close()
Expand Down

0 comments on commit 129c61c

Please sign in to comment.