From 622c27e19a281b344804be038ac2f45d5c9933ae Mon Sep 17 00:00:00 2001 From: Blake Fitch <blake.fitch@tuebingen.mpg.de> Date: Sun, 30 Jun 2024 13:13:30 +0200 Subject: [PATCH] enable removing old confirmed_put files (objects) when skip_on_zero_len and replace_existing --- pymods/irods_utils/irods_utils.py | 49 ++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/pymods/irods_utils/irods_utils.py b/pymods/irods_utils/irods_utils.py index 9fc32f8..a86b158 100644 --- a/pymods/irods_utils/irods_utils.py +++ b/pymods/irods_utils/irods_utils.py @@ -350,12 +350,13 @@ def streaming_transfer_file_to_object( args ): return rc, hash_digest256, hash_digest512, transfer_size # Returns None or the data_object irods ref. -def confirmed_put( irods_sesh, file_pathname, new_obj_ipath, metadata_dict=None, datatype=None, block_size=(2**28), replace_existing=None ): +def confirmed_put( irods_sesh, file_pathname, new_obj_ipath, metadata_dict=None, datatype=None, block_size=(2**28), replace_existing=False, skip_on_zero_len=False ): logging.debug( "file_pathname: " + file_pathname + " new_obj_ipath: " + new_obj_ipath + " datatype: " + datatype + " block_size: " + str( block_size ) - + " replace_existing " + str( replace_existing) ) + + " replace_existing " + str( replace_existing), + + " skip_on_zero_len " + str( skip_on_zero_len ) ) # NOTE: this routine uploades the file as a temporary iRODS object. # Common sense suggests checking if the objct already exsists, a rare condition first. @@ -368,6 +369,34 @@ def confirmed_put( irods_sesh, file_pathname, new_obj_ipath, metadata_dict=None, logging.error( "irods_sesh == None" ) return None + # Figure out if this path in the archive is occupied with pre-existing data. + existing_obj = None + try: + options = {kw.VERIFY_CHKSUM_KW: ''} + existing_obj = irods_sesh.data_objects.get( new_obj_ipath, **options ) + logging.debug( "Object aleady exists at ipath: " + new_obj_ipath ) + except irods_ex.DataObjectDoesNotExist: + logging.debug( "DataObjectDoesNotExist " + new_obj_ipath ) + except irods_ex.OBJ_PATH_DOES_NOT_EXIST: + logging.debug( "OBJ_PATH_DOES_NOT_EXIST " + new_obj_ipath ) + except Exception as ex: + logging.error("Failed using data_onject.get() (but not DataObjectDoesNotExist) ipath: " + new_obj_ipath + " ex: " + str(ex) + " type " + str(type(ex)) ) + return None + + try: + upload_file_size = os.path.getsize( file_pathname ) + except Exception as ex: + logging.error( f"FATAL: Failed to get size of file to upload. file_pathname: {file_pathname} ex: {ex}" ) + raise + + if upload_file_size == 0 and existing_obj != None and replace_existing and skip_on_zero_len : + logging.warning( f"got zero len file with replace_existing -- removing current data object. ipath: {new_object_ipath}" ) + try: + existing_obj.unlink(force=True) + except Exception as ex: + logging.warning( "Failed remove zero line file on replace_existing and skip_on_zero_len. ipath: {new_obj_ipath} ex: {ex} " ) + raise + # Make a timestamped partial tmp file name which, if the upload somehow fails here, will be left behind # Hopefully this does not happen much, but we have seen at least on case so far. # Cleanup will need to be done elsewhere. @@ -428,20 +457,6 @@ def confirmed_put( irods_sesh, file_pathname, new_obj_ipath, metadata_dict=None, logging.error("metadata AVU dict >" + str( metadata_dict ) + "<" ) return None - existing_obj = None - - try: - options = {kw.VERIFY_CHKSUM_KW: ''} - existing_obj = irods_sesh.data_objects.get( new_obj_ipath, **options ) - logging.debug( "Object aleady exists at ipath: " + new_obj_ipath ) - except irods_ex.DataObjectDoesNotExist: - logging.debug( "DataObjectDoesNotExist " + new_obj_ipath ) - except irods_ex.OBJ_PATH_DOES_NOT_EXIST: - logging.debug( "OBJ_PATH_DOES_NOT_EXIST " + new_obj_ipath ) - except Exception as ex: - logging.error("Failed using data_onject.get() (but not DataObjectDoesNotExist) ipath: " + new_obj_ipath + " ex: " + str(ex) + " type " + str(type(ex)) ) - return None - mismatch = False if existing_obj != None: @@ -512,7 +527,7 @@ def confirmed_put( irods_sesh, file_pathname, new_obj_ipath, metadata_dict=None, dup_obj.unlink(force=True) except Exception as ex: logging.warning( "Failed remove dup when remove_existing flag set. ipath: {dup_ipath} ex: {ex} " ) - + new_obj = None try: new_obj = irods_sesh.data_objects.get( new_obj_ipath ) -- GitLab