Kleines Python-Script, um Daten von einem lokalen Verzeichnis in einen Google Cloud Storage Bucket zu verschieben.
Stand: April 2018 (getestet mit Python 2.7)
LEGACY PYTHON
Bei neuen Projekten besser Python 3 verwenden!
→ Dieses Skript in Python 3 ←
#!/usr/bin/env python2 """ gcloud-storage-transfer.py 2018-05 Hella Breitkopf https://www.unixwitch.de for Python2 Transfer files from a local directory to an example google cloud storage bucket. Authorization for cloud access is expected in ./gcloud-storage-transfer-credentials.json Usage: gcloud-storage-transfer.py [-v][-d][-n<int>|--upload=<integer>] -v verbose (increase verbosity with -vv and -vvv) -d debug -n <number> | --upload=<number> restrict file upload to a certain number of files with -n<number> or --upload=<number> (default restriction is 15 files per run of this script) """ import os import sys import getopt import errno import shutil from google.cloud import storage _DEBUG=False _VERBOSE=False _VERBOSE_LEVEL=0 # for verbosity (also for debugging verbosity) _MAX_UPLOAD_NO=15 ### LOCAL STORAGE VARIABLES # not yet transfered files are stored here: local_dir="/tmp/example-files" # where the files should be stored if the transfer to google cloud is complete ## (if you don't want to keep the files write "DELETE" as target) #local_transfered_dir="/tmp/gcloud-storage-transfer-done" local_transfered_dir="DELETE" # store list of files we have stored locally (in local_dir) LOCAL_FILE_DICT={} ### GOOGLE CLOUD STORAGE VARIABLES SCRIPT_DIR=sys.path[0] # we expect the credentials in the script dir os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \ SCRIPT_DIR+"/gcloud-storage-transfer-credentials.json" GS_BUCKET_NAME="example" GS_PATH="log_files" # store list of files which are already uploaded, so we don't transfer them again GS_FILE_DICT={} def createdir(dir): """create a local directory""" try: os.mkdir(dir) except OSError as e: if e.errno != errno.EEXIST: print "Error while creating directory" print str(e) sys.exit(5) if not os.path.isdir(dir): print "Can't create directory %s, other file is using this path" % dir print str(e) sys.exit(6) return() def fill_transfered_dict(): """what files are already uploaded -> dict include file size """ global GS_FILE_DICT global LOCAL_FILE_DICT try: client=storage.Client() except Exception, e: print "ERROR connecting to google storage" print str(e) sys.exit(11) try: bucket=client.get_bucket(GS_BUCKET_NAME) except Exception, e: print "ERROR connecting to google storage bucket" print str(e) sys.exit(12) for blob in bucket.list_blobs(prefix=GS_PATH): gsfilename=blob.name.replace(GS_PATH+'/','') gsfilesize=blob.size GS_FILE_DICT[gsfilename]=gsfilesize for local_file in os.listdir(local_dir): if _DEBUG: print local_file local_filesize=os.path.getsize(local_dir+"/"+local_file) if _DEBUG: print local_filesize LOCAL_FILE_DICT[local_file]=local_filesize try: for local_file in os.listdir(local_dir): if _DEBUG: print local_file local_filesize=os.path.getsize(local_file) if _DEBUG: print local_filesize LOCAL_FILE_DICT[local_file]=local_filesize except: # nothing found, that does not hurt here # (maybe the directory is not there, yet) pass if _VERBOSE: print "-> %s Files found on google cloud" % len(GS_FILE_DICT) return() def push_files_to_bucket(): """push files from local_dir to google cloud bucket""" if not local_transfered_dir==("DELETE"): createdir(local_transfered_dir) client=storage.Client() bucket=client.get_bucket(GS_BUCKET_NAME) filelist=os.listdir(local_dir) for filename in filelist: local_file=local_dir+"/"+filename localsize=os.path.getsize(local_file) ### uploade only files which are not uploaded (or which differ in size) if (filename in GS_FILE_DICT.keys()): gssize=GS_FILE_DICT[filename] if (gssize == localsize): if _VERBOSE_LEVEL>0: print "found on gs storage:" \ "%s with same size: %s" % (filename, GS_FILE_DICT[filename]) # delete/move to "done-dir" and continue with next file: if not local_transfered_dir==("DELETE"): shutil.move(local_file,local_transfered_dir+'/'+filename) else: os.remove(local_file) continue else: if _VERBOSE: print "! found on gs storage: " \ "%s but with different size: %s (local size: %s) !" % \ (filename, GS_FILE_DICT[filename],localsize) # transfer this other version of the file pass remotefile=GS_PATH+"/"+filename #if _DEBUG: print local_file try: fh=open(local_file,"r") newblob=bucket.blob(remotefile) #if _DEBUG: print remotefile if _VERBOSE: print "Transfer file %s to google storage" % filename newblob.upload_from_file(fh) except Exception, e: print "Error while uploading file to gs storage: %s" % filename print str(e) fh.close() sys.exit(7) fh.close() if not local_transfered_dir==("DELETE"): shutil.move(local_file,local_transfered_dir+'/'+filename) else: os.remove(local_file) if _VERBOSE: print 'New files uploaded to google storage %s/%s' % (GS_BUCKET_NAME,GS_PATH) return() def increase_verbosity(): global _VERBOSE global _VERBOSE_LEVEL if _VERBOSE: _VERBOSE_LEVEL=_VERBOSE_LEVEL+1 _VERBOSE=True def print_usage(): print __doc__ def main(): # read startup options opts,args=getopt.getopt(sys.argv[1:],'hvdn:', ["help", "upload="]) global _DEBUG global _MAX_UPLOAD_NO for o, a in opts: if o in ("-h", "--help"): print_usage() exit(0) elif o=="-v": increase_verbosity() elif o=="-d": _DEBUG=True elif o in ("-n","--upload"): try: a=int(a) except ValueError,e: print "the number of files to tranfer (-n,--upload=) must be integer!)" print_usage() sys.exit(9) _MAX_UPLOAD_NO=a else: print "wrong option %s %s" % (o,a) sys.exit(10) if __name__ == '__main__': main() # what files are already uploaded? -> write to dict fill_transfered_dict() if (_VERBOSE_LEVEL>2): if _DEBUG: print "files on google storage: ", GS_FILE_DICT.keys() if _DEBUG: print "files on local drive: ", LOCAL_FILE_DICT.keys() push_files_to_bucket() sys.exit(0) # the end
gilt für alle Tipps, Tricks & Spickzettel:
dies sind einfache, teils banale Notizen für meinen persönlichen Gebrauch,
die hier eher zufällig auch öffentlich lesbar sind
(vielleicht hilft es ja jemandem weiter). Verwendung auf eigene Gefahr
Fehler-Hinweise, Dankesschreiben , etc. bitte an: web.21@unixwitch.de
weitere Tools / Spickzettel