Benutzer-Werkzeuge

Webseiten-Werkzeuge


de:sysadmin:tools:gcloud-storage-python3

gcloud-storage-transfer Script (Python3)

Kleines Python-Script, um Daten von einem lokalen Verzeichnis in einen Google Cloud Storage Bucket zu verschieben.

Stand: April 2019-10 für Python3.7

Für Historiker ;-) : Hier geht es zur alten Python2-Variante

#!/usr/bin/env python3
"""
gcloud-storage-transfer.py3
 
2019-10 Hella Breitkopf https://www.unixwitch.de for Python3
 
Transfer files from a local directory to an
example google cloud storage bucket.
 
Authorization for cloud access is expected in ./gcloud-storage-transfer-credentials.json
 
Usage:
    gcloud-storage-transfer.py3 [-v][-d][-n<int>|--upload=<integer>]
 
    -v verbose (increase verbosity with -vv and -vvv)
    -d debug
    -n <number> | --upload=<number>
 
restrict file upload to a certain number of files with -n<number> or --upload=<number>
(default restriction is 15 files per run of this script)
"""
 
import os
import sys
import getopt
import errno
import shutil
from gcloud import storage
 
_DEBUG = False
_VERBOSE = False
_VERBOSE_LEVEL = 0   # for verbosity (also for debugging verbosity)
_MAX_UPLOAD_NO = 15
 
### LOCAL STORAGE VARIABLES
# not yet transfered files are stored here:
LOCAL_DIR = "/tmp/example-files"
# where the files should be stored if the transfer to google cloud is complete
## (if you don't want to keep the files write "DELETE" as target)
#LOCAL_TRANSFERED_DIR = "/tmp/gcloud-storage-transfer-done"
LOCAL_TRANSFERED_DIR = "DELETE"
# store list of files we have stored locally (in LOCAL_DIR)
LOCAL_FILE_DICT = {}
 
### GOOGLE CLOUD STORAGE VARIABLES
SCRIPT_DIR = sys.path[0]  # we expect the credentials in the script dir
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \
    SCRIPT_DIR+"/gcloud-storage-transfer-credentials.json"
GS_BUCKET_NAME = "example-gcloud-storage-transfer-txdxn7nq"
GS_PATH = "example_files"
# store list of files which are already uploaded, so we don't transfer them again
GS_FILE_DICT = {}
 
 
def createdir(newdir):
    """create a local directory"""
    try:
        os.mkdir(newdir)
    except OSError as err:
        if err.errno != errno.EEXIST:
            print("Error while creating directory")
            print(str(err))
            sys.exit(5)
        if not os.path.isdir(newdir):
            print("Can't create directory %s, other file is using this path" % newdir)
            print(str(err))
            sys.exit(6)
    return()
 
 
def fill_transfered_dict():
    """what files are already uploaded -> dict
    include file size
    """
 
    global GS_FILE_DICT
    global LOCAL_FILE_DICT
 
    try:
        client = storage.Client()
    except Exception as err:
        print("ERROR connecting to google storage")
        print(str(err))
        sys.exit(11)
 
    try:
        bucket = client.get_bucket(GS_BUCKET_NAME)
    except Exception as err:
        print("ERROR connecting to google storage bucket")
        print(str(err))
        sys.exit(12)
 
    for blob in bucket.list_blobs(prefix=GS_PATH):
        gsfilename = blob.name.replace(GS_PATH+'/', '')
        gsfilesize = blob.size
        GS_FILE_DICT[gsfilename] = gsfilesize
 
    for local_file in os.listdir(LOCAL_DIR):
        if _DEBUG:
            print(local_file)
        local_filesize = os.path.getsize(LOCAL_DIR+"/"+local_file)
        if _DEBUG:
            print(local_filesize)
        LOCAL_FILE_DICT[local_file] = local_filesize
    try:
        for local_file in os.listdir(LOCAL_DIR):
            if _DEBUG:
                print(local_file)
            local_filesize = os.path.getsize(local_file)
            if _DEBUG:
                print(local_filesize)
            LOCAL_FILE_DICT[local_file] = local_filesize
    except:
        # nothing found, that does not hurt here
        # (maybe the directory is not there, yet)
        pass
 
    if _VERBOSE:
        print("-> %s Files found on google cloud" % len(GS_FILE_DICT))
 
    return()
 
 
def push_files_to_bucket():
    """push files from LOCAL_DIR to google cloud bucket"""
 
    if LOCAL_TRANSFERED_DIR != ("DELETE"):
        createdir(LOCAL_TRANSFERED_DIR)
 
    client = storage.Client()
    bucket = client.get_bucket(GS_BUCKET_NAME)
 
    filelist = os.listdir(LOCAL_DIR)
 
    for filename in filelist:
 
        local_file = LOCAL_DIR+"/"+filename
        localsize = os.path.getsize(local_file)
 
        ### uploade only files which are not uploaded (or which differ in size)
        if filename in list(GS_FILE_DICT.keys()):
            gssize = GS_FILE_DICT[filename]
            if gssize == localsize:
                if _VERBOSE_LEVEL > 0:
                    print("found on gs storage:" \
                          "%s with same size: %s" % (filename, GS_FILE_DICT[filename]))
                # delete/move to "done-dir" and continue with next file:
                if LOCAL_TRANSFERED_DIR != ("DELETE"):
                    shutil.move(local_file, LOCAL_TRANSFERED_DIR+'/'+filename)
                else:
                    os.remove(local_file)
                continue
 
            else:
                if _VERBOSE:
                    print("! found on gs storage: " \
                          "%s but with different size: %s (local size: %s) !" % \
                          (filename, GS_FILE_DICT[filename], localsize))
                # transfer this other version of the file
                pass
 
 
        remotefile = GS_PATH+"/"+filename
        #if _DEBUG: print local_file
        try:
            if _DEBUG:
                print ("try to read local ", local_file)
            filehandle = open(local_file, "rb")
            if _DEBUG:
                print ("try to open remote 'blob' for ", remotefile)
            newblob = bucket.blob(remotefile)
 
            if _VERBOSE:
                print("Transfer file %s to google storage" % filename)
            newblob.upload_from_file(filehandle)
 
        except Exception as err:
            print("Error while uploading file to gs storage: %s" % filename)
            print(str(err))
            filehandle.close()
            sys.exit(7)
 
        filehandle.close()
        if LOCAL_TRANSFERED_DIR != ("DELETE"):
            shutil.move(local_file, LOCAL_TRANSFERED_DIR+'/'+filename)
        else:
            os.remove(local_file)
 
    if _VERBOSE:
        print('New files uploaded to google storage %s/%s' % (GS_BUCKET_NAME, GS_PATH))
 
    return()
 
 
def increase_verbosity():
    """set verbosity level one up"""
    global _VERBOSE
    global _VERBOSE_LEVEL
    if _VERBOSE:
        _VERBOSE_LEVEL = _VERBOSE_LEVEL+1
    _VERBOSE = True
 
 
def print_usage():
    """print how to use this"""
    print(__doc__)
 
 
def main():
    """read startup options"""
    opts, _ = getopt.getopt(sys.argv[1:], 'hvdn:', ["help", "upload="])
    global _DEBUG
    global _MAX_UPLOAD_NO
 
    for option, argument in opts:
        if option in ("-h", "--help"):
            print_usage()
            sys.exit(0)
        elif option == "-v":
            increase_verbosity()
        elif option == "-d":
            _DEBUG = True
        elif option in ("-n", "--upload"):
            try:
                argument = int(argument)
            except ValueError:
                print("the number of files to tranfer (-n,--upload=) must be integer!)")
                print_usage()
                sys.exit(9)
            _MAX_UPLOAD_NO = argument
        else:
            print("wrong option %s %s" % (option, argument))
            sys.exit(10)
 
if __name__ == '__main__':
 
    main()
 
    # what files are already uploaded? -> write to dict
    fill_transfered_dict()
 
    if _VERBOSE_LEVEL > 2:
        if _DEBUG:
            print("files on google storage: ", list(GS_FILE_DICT.keys()))
        if _DEBUG:
            print("files on local drive: ", list(LOCAL_FILE_DICT.keys()))
 
    push_files_to_bucket()
 
    sys.exit(0)
 
    # the end
de/sysadmin/tools/gcloud-storage-python3.txt · Zuletzt geändert: 2019-10-14 14:32 von hella

Seiten-Werkzeuge

Mastodon Twitter