Kleines Python-Script, um Daten von einem lokalen Verzeichnis in einen Google Cloud Storage Bucket zu verschieben.
Stand: April 2019-10 für Python3.7
Für Historiker : Hier geht es zur alten Python2-Variante
#!/usr/bin/env python3 """ gcloud-storage-transfer.py3 2019-10 Hella Breitkopf https://www.unixwitch.de for Python3 Transfer files from a local directory to an example google cloud storage bucket. Authorization for cloud access is expected in ./gcloud-storage-transfer-credentials.json Usage: gcloud-storage-transfer.py3 [-v][-d][-n<int>|--upload=<integer>] -v verbose (increase verbosity with -vv and -vvv) -d debug -n <number> | --upload=<number> restrict file upload to a certain number of files with -n<number> or --upload=<number> (default restriction is 15 files per run of this script) """ import os import sys import getopt import errno import shutil from gcloud import storage _DEBUG = False _VERBOSE = False _VERBOSE_LEVEL = 0 # for verbosity (also for debugging verbosity) _MAX_UPLOAD_NO = 15 ### LOCAL STORAGE VARIABLES # not yet transfered files are stored here: LOCAL_DIR = "/tmp/example-files" # where the files should be stored if the transfer to google cloud is complete ## (if you don't want to keep the files write "DELETE" as target) #LOCAL_TRANSFERED_DIR = "/tmp/gcloud-storage-transfer-done" LOCAL_TRANSFERED_DIR = "DELETE" # store list of files we have stored locally (in LOCAL_DIR) LOCAL_FILE_DICT = {} ### GOOGLE CLOUD STORAGE VARIABLES SCRIPT_DIR = sys.path[0] # we expect the credentials in the script dir os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \ SCRIPT_DIR+"/gcloud-storage-transfer-credentials.json" GS_BUCKET_NAME = "example-gcloud-storage-transfer-txdxn7nq" GS_PATH = "example_files" # store list of files which are already uploaded, so we don't transfer them again GS_FILE_DICT = {} def createdir(newdir): """create a local directory""" try: os.mkdir(newdir) except OSError as err: if err.errno != errno.EEXIST: print("Error while creating directory") print(str(err)) sys.exit(5) if not os.path.isdir(newdir): print("Can't create directory %s, other file is using this path" % newdir) print(str(err)) sys.exit(6) return() def fill_transfered_dict(): """what files are already uploaded -> dict include file size """ global GS_FILE_DICT global LOCAL_FILE_DICT try: client = storage.Client() except Exception as err: print("ERROR connecting to google storage") print(str(err)) sys.exit(11) try: bucket = client.get_bucket(GS_BUCKET_NAME) except Exception as err: print("ERROR connecting to google storage bucket") print(str(err)) sys.exit(12) for blob in bucket.list_blobs(prefix=GS_PATH): gsfilename = blob.name.replace(GS_PATH+'/', '') gsfilesize = blob.size GS_FILE_DICT[gsfilename] = gsfilesize for local_file in os.listdir(LOCAL_DIR): if _DEBUG: print(local_file) local_filesize = os.path.getsize(LOCAL_DIR+"/"+local_file) if _DEBUG: print(local_filesize) LOCAL_FILE_DICT[local_file] = local_filesize try: for local_file in os.listdir(LOCAL_DIR): if _DEBUG: print(local_file) local_filesize = os.path.getsize(local_file) if _DEBUG: print(local_filesize) LOCAL_FILE_DICT[local_file] = local_filesize except: # nothing found, that does not hurt here # (maybe the directory is not there, yet) pass if _VERBOSE: print("-> %s Files found on google cloud" % len(GS_FILE_DICT)) return() def push_files_to_bucket(): """push files from LOCAL_DIR to google cloud bucket""" if LOCAL_TRANSFERED_DIR != ("DELETE"): createdir(LOCAL_TRANSFERED_DIR) client = storage.Client() bucket = client.get_bucket(GS_BUCKET_NAME) filelist = os.listdir(LOCAL_DIR) for filename in filelist: local_file = LOCAL_DIR+"/"+filename localsize = os.path.getsize(local_file) ### uploade only files which are not uploaded (or which differ in size) if filename in list(GS_FILE_DICT.keys()): gssize = GS_FILE_DICT[filename] if gssize == localsize: if _VERBOSE_LEVEL > 0: print("found on gs storage:" \ "%s with same size: %s" % (filename, GS_FILE_DICT[filename])) # delete/move to "done-dir" and continue with next file: if LOCAL_TRANSFERED_DIR != ("DELETE"): shutil.move(local_file, LOCAL_TRANSFERED_DIR+'/'+filename) else: os.remove(local_file) continue else: if _VERBOSE: print("! found on gs storage: " \ "%s but with different size: %s (local size: %s) !" % \ (filename, GS_FILE_DICT[filename], localsize)) # transfer this other version of the file pass remotefile = GS_PATH+"/"+filename #if _DEBUG: print local_file try: if _DEBUG: print ("try to read local ", local_file) filehandle = open(local_file, "rb") if _DEBUG: print ("try to open remote 'blob' for ", remotefile) newblob = bucket.blob(remotefile) if _VERBOSE: print("Transfer file %s to google storage" % filename) newblob.upload_from_file(filehandle) except Exception as err: print("Error while uploading file to gs storage: %s" % filename) print(str(err)) filehandle.close() sys.exit(7) filehandle.close() if LOCAL_TRANSFERED_DIR != ("DELETE"): shutil.move(local_file, LOCAL_TRANSFERED_DIR+'/'+filename) else: os.remove(local_file) if _VERBOSE: print('New files uploaded to google storage %s/%s' % (GS_BUCKET_NAME, GS_PATH)) return() def increase_verbosity(): """set verbosity level one up""" global _VERBOSE global _VERBOSE_LEVEL if _VERBOSE: _VERBOSE_LEVEL = _VERBOSE_LEVEL+1 _VERBOSE = True def print_usage(): """print how to use this""" print(__doc__) def main(): """read startup options""" opts, _ = getopt.getopt(sys.argv[1:], 'hvdn:', ["help", "upload="]) global _DEBUG global _MAX_UPLOAD_NO for option, argument in opts: if option in ("-h", "--help"): print_usage() sys.exit(0) elif option == "-v": increase_verbosity() elif option == "-d": _DEBUG = True elif option in ("-n", "--upload"): try: argument = int(argument) except ValueError: print("the number of files to tranfer (-n,--upload=) must be integer!)") print_usage() sys.exit(9) _MAX_UPLOAD_NO = argument else: print("wrong option %s %s" % (option, argument)) sys.exit(10) if __name__ == '__main__': main() # what files are already uploaded? -> write to dict fill_transfered_dict() if _VERBOSE_LEVEL > 2: if _DEBUG: print("files on google storage: ", list(GS_FILE_DICT.keys())) if _DEBUG: print("files on local drive: ", list(LOCAL_FILE_DICT.keys())) push_files_to_bucket() sys.exit(0) # the end
gilt für alle Tipps, Tricks & Spickzettel:
dies sind einfache, teils banale Notizen für meinen persönlichen Gebrauch,
die hier eher zufällig auch öffentlich lesbar sind
(vielleicht hilft es ja jemandem weiter). Verwendung auf eigene Gefahr
Fehler-Hinweise, Dankesschreiben , etc. bitte an: web.21@unixwitch.de
weitere Tools / Spickzettel