diff --git a/src/partisan/irods.py b/src/partisan/irods.py index c9780b3..522ed22 100644 --- a/src/partisan/irods.py +++ b/src/partisan/irods.py @@ -2194,7 +2194,7 @@ def put( against the remote checksum calculated by the iRODS server for data objects. local_checksum: A caller-supplied checksum of the local file. This may be a - string, a path to a file containing a string, or a file name + string, a path to a file containing a string, or a file path transformation function. If the latter, it must accept the local path as its only argument and return a string checksum. Typically, this is useful when this checksum is available from an earlier process that @@ -2239,9 +2239,9 @@ def put( else: raise ValueError( f"Invalid type for local_checksum: {type(local_checksum)}; must be " - "a string or a path of a file containing a string" + "a string, a path of a file containing a string, or a callable " + "taking a path of a file and returning a string" ) - if fill and self.exists() and self.checksum() == chk: log.info( "Data object already exists in iRODS with matching checksum; skipping", @@ -2656,6 +2656,7 @@ def put( recurse=False, calculate_checksum=False, verify_checksum=False, + local_checksum=None, compare_checksums=False, fill=False, force=True, @@ -2673,6 +2674,12 @@ def put( verify_checksum: Verify the local checksum calculated by the iRODS C API against the remote checksum calculated by the iRODS server for data objects. See DataObject.put() for more information. + local_checksum: A callable that returns a checksum for a local file. See + DataObject.put() for more information. This is called for each file in + encountered while recursing, with the file path as its argument. + (Also accepts a string or a path to a file containing a string, as does + DataObject.put(), however this is not useful for collections except in + the edge where all the files have identical contents). compare_checksums: Compare caller-supplied local checksums to the remote checksums calculated by the iRODS server after the put operation for data objects. If the checksums do not match, raise an error. See @@ -2707,6 +2714,7 @@ def put( p, calculate_checksum=calculate_checksum, verify_checksum=verify_checksum, + local_checksum=local_checksum, compare_checksums=compare_checksums, fill=fill, force=force, @@ -2726,6 +2734,7 @@ def put( p, calculate_checksum=calculate_checksum, verify_checksum=verify_checksum, + local_checksum=local_checksum, compare_checksums=compare_checksums, force=force, timeout=timeout, diff --git a/tests/test_irods.py b/tests/test_irods.py index 70036f7..f9c2910 100644 --- a/tests/test_irods.py +++ b/tests/test_irods.py @@ -830,6 +830,73 @@ def test_data_object_put_checksum_no_verify(self, simple_collection): assert obj.size() == 555 assert obj.checksum() == "39a4aa291ca849d601e4e5b8ed627a04" + @m.it("Can put put from a local file with checksum calculated on the fly") + def test_data_object_put_checksum_supplied(self, simple_collection): + obj = DataObject(simple_collection / "new.txt") + assert not obj.exists() + + local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute() + checksum = "39a4aa291ca849d601e4e5b8ed627a04" + + obj.put(local_path, calculate_checksum=True, compare_checksums=True) + assert obj.exists() + assert obj.checksum() == checksum + + @m.it("Can put put from a local file with a supplied local checksum string") + def test_data_object_put_checksum_supplied(self, simple_collection): + obj = DataObject(simple_collection / "new.txt") + assert not obj.exists() + + local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute() + checksum = "39a4aa291ca849d601e4e5b8ed627a04" + obj.put( + local_path, + calculate_checksum=True, + compare_checksums=True, + local_checksum=checksum, + ) + assert obj.exists() + assert obj.checksum() == checksum + + @m.it("Can put put from a local file with a supplied local checksum callable") + def test_data_object_put_callable_supplied(self, simple_collection): + obj = DataObject(simple_collection / "new.txt") + assert not obj.exists() + + local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute() + checksum = "39a4aa291ca849d601e4e5b8ed627a04" + obj.put( + local_path, + calculate_checksum=True, + compare_checksums=True, + local_checksum=lambda _: checksum, + ) + assert obj.exists() + assert obj.checksum() == checksum + + @m.it("Raises an error if a supplied local checksum callable does not match") + def test_data_object_put_callable_supplied(self, simple_collection): + obj = DataObject(simple_collection / "new.txt") + assert not obj.exists() + + local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute() + with pytest.raises(ValueError, match="mismatch"): + obj.put( + local_path, + calculate_checksum=True, + compare_checksums=True, + local_checksum=lambda _: "a bad checksum", + ) + + @m.it("Raises an error if a supplied local checksum string does not match") + def test_data_object_put_checksum_supplied_mismatch(self, simple_collection): + obj = DataObject(simple_collection / "new.txt") + assert not obj.exists() + + local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute() + with pytest.raises(ValueError, match="mismatch"): + obj.put(local_path, compare_checksums=True, local_checksum="a bad checksum") + @m.describe("Operations on an existing DataObject") @m.context("When a DataObject exists") @m.it("Can be detected")