Benutzer-Werkzeuge

Webseiten-Werkzeuge


de:sysadmin:tools:gcloud-storage

gcloud-storage-transfer Script (Python2)

Kleines Python-Script, um Daten von einem lokalen Verzeichnis in einen Google Cloud Storage Bucket zu verschieben.

Stand: April 2018 (getestet mit Python 2.7)

LEGACY PYTHON
Bei neuen Projekten besser Python 3 verwenden!
Dieses Skript in Python 3

#!/usr/bin/env python2
"""
gcloud-storage-transfer.py
 
2018-05 Hella Breitkopf https://www.unixwitch.de for Python2
 
Transfer files from a local directory to an
example google cloud storage bucket.
 
Authorization for cloud access is expected in ./gcloud-storage-transfer-credentials.json
 
Usage:
    gcloud-storage-transfer.py [-v][-d][-n<int>|--upload=<integer>]
 
    -v verbose (increase verbosity with -vv and -vvv)
    -d debug
    -n <number> | --upload=<number>
 
restrict file upload to a certain number of files with -n<number> or --upload=<number>
(default restriction is 15 files per run of this script)
"""
 
import os
import sys
import getopt
import errno
import shutil
from google.cloud import storage
 
 
_DEBUG=False
_VERBOSE=False
_VERBOSE_LEVEL=0   # for verbosity (also for debugging verbosity)
_MAX_UPLOAD_NO=15
 
### LOCAL STORAGE VARIABLES
# not yet transfered files are stored here:
local_dir="/tmp/example-files"
# where the files should be stored if the transfer to google cloud is complete
## (if you don't want to keep the files write "DELETE" as target)
#local_transfered_dir="/tmp/gcloud-storage-transfer-done"
local_transfered_dir="DELETE"
# store list of files we have stored locally (in local_dir)
LOCAL_FILE_DICT={}
 
### GOOGLE CLOUD STORAGE VARIABLES
SCRIPT_DIR=sys.path[0]  # we expect the credentials in the script dir
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \
    SCRIPT_DIR+"/gcloud-storage-transfer-credentials.json"
GS_BUCKET_NAME="example"
GS_PATH="log_files"
# store list of files which are already uploaded, so we don't transfer them again
GS_FILE_DICT={}
 
 
def createdir(dir):
    """create a local directory"""
    try:
        os.mkdir(dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            print "Error while creating directory"
            print str(e)
            sys.exit(5)
        if not os.path.isdir(dir):
            print "Can't create directory %s, other file is using this path" % dir
            print str(e)
            sys.exit(6)
    return()
 
 
def fill_transfered_dict():
    """what files are already uploaded -> dict
    include file size
    """
 
    global GS_FILE_DICT
    global LOCAL_FILE_DICT
 
    try:
        client=storage.Client()
    except Exception, e:
        print "ERROR connecting to google storage"
        print str(e)
        sys.exit(11)
 
    try:
        bucket=client.get_bucket(GS_BUCKET_NAME)
    except Exception, e:
        print "ERROR connecting to google storage bucket"
        print str(e)
        sys.exit(12)
 
    for blob in bucket.list_blobs(prefix=GS_PATH):
        gsfilename=blob.name.replace(GS_PATH+'/','')
        gsfilesize=blob.size
        GS_FILE_DICT[gsfilename]=gsfilesize
 
    for local_file in os.listdir(local_dir):
        if _DEBUG: print local_file
        local_filesize=os.path.getsize(local_dir+"/"+local_file)
        if _DEBUG: print local_filesize
        LOCAL_FILE_DICT[local_file]=local_filesize
    try:
        for local_file in os.listdir(local_dir):
            if _DEBUG: print local_file
            local_filesize=os.path.getsize(local_file)
            if _DEBUG: print local_filesize
            LOCAL_FILE_DICT[local_file]=local_filesize
    except:
        # nothing found, that does not hurt here
        # (maybe the directory is not there, yet)
        pass
 
    if _VERBOSE: print "-> %s Files found on google cloud" % len(GS_FILE_DICT)
 
    return()
 
 
def push_files_to_bucket():
    """push files from local_dir to google cloud bucket"""
 
    if not local_transfered_dir==("DELETE"):
        createdir(local_transfered_dir)
 
    client=storage.Client()
    bucket=client.get_bucket(GS_BUCKET_NAME)
 
    filelist=os.listdir(local_dir)
 
    for filename in filelist:
 
        local_file=local_dir+"/"+filename
        localsize=os.path.getsize(local_file)
 
        ### uploade only files which are not uploaded (or which differ in size)
        if (filename in GS_FILE_DICT.keys()):
            gssize=GS_FILE_DICT[filename]
            if (gssize == localsize):
                if _VERBOSE_LEVEL>0:
                    print "found on gs storage:" \
                          "%s with same size: %s" % (filename, GS_FILE_DICT[filename])
                # delete/move to "done-dir" and continue with next file:
                if not local_transfered_dir==("DELETE"):
                    shutil.move(local_file,local_transfered_dir+'/'+filename)
                else:
                    os.remove(local_file)
                continue
 
            else:
                if _VERBOSE:
                    print "! found on gs storage: " \
                          "%s but with different size: %s (local size: %s) !" % \
                          (filename, GS_FILE_DICT[filename],localsize)
                # transfer this other version of the file
                pass
 
 
        remotefile=GS_PATH+"/"+filename
        #if _DEBUG: print local_file
        try:
            fh=open(local_file,"r")
            newblob=bucket.blob(remotefile)
            #if _DEBUG: print remotefile
            if _VERBOSE:
                print "Transfer file %s to google storage" % filename
            newblob.upload_from_file(fh)
 
        except Exception, e:
            print "Error while uploading file to gs storage: %s" % filename
            print str(e)
            fh.close()
            sys.exit(7)
 
        fh.close()
        if not local_transfered_dir==("DELETE"):
            shutil.move(local_file,local_transfered_dir+'/'+filename)
        else:
            os.remove(local_file)
 
    if _VERBOSE:
        print 'New files uploaded to google storage %s/%s' % (GS_BUCKET_NAME,GS_PATH)
 
    return()
 
 
def increase_verbosity():
    global _VERBOSE
    global _VERBOSE_LEVEL
    if _VERBOSE:
        _VERBOSE_LEVEL=_VERBOSE_LEVEL+1
    _VERBOSE=True
 
 
def print_usage():
    print __doc__
 
 
def main():
    # read startup options
    opts,args=getopt.getopt(sys.argv[1:],'hvdn:', ["help", "upload="])
    global _DEBUG
    global _MAX_UPLOAD_NO
 
    for o, a in opts:
        if o in ("-h", "--help"):
            print_usage()
            exit(0)
        elif o=="-v":
            increase_verbosity()
        elif o=="-d":
            _DEBUG=True
        elif o in ("-n","--upload"):
            try:
                a=int(a)
            except ValueError,e:
                print "the number of files to tranfer (-n,--upload=) must be integer!)"
                print_usage()
                sys.exit(9)
            _MAX_UPLOAD_NO=a
        else:
            print "wrong option %s %s" % (o,a)
            sys.exit(10)
 
if __name__ == '__main__':
 
    main()
 
    # what files are already uploaded? -> write to dict
    fill_transfered_dict()
 
    if (_VERBOSE_LEVEL>2):
        if _DEBUG: print "files on google storage: ", GS_FILE_DICT.keys()
        if _DEBUG: print "files on local drive: ", LOCAL_FILE_DICT.keys()
 
    push_files_to_bucket()
 
    sys.exit(0)
 
    # the end
de/sysadmin/tools/gcloud-storage.txt · Zuletzt geändert: 2019-10-14 13:27 von hella

Seiten-Werkzeuge

Mastodon Twitter